| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 |
| ; RUN: opt -p loop-vectorize -force-vector-width=4 -mtriple riscv64 -mattr=+v -scalable-vectorization=on -S %s | FileCheck %s |
| |
| define i32 @find_last_trunc_iv(ptr %src, i64 %n) { |
| ; CHECK-LABEL: define i32 @find_last_trunc_iv( |
| ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[LOOP:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() |
| ; CHECK-NEXT: br label %[[VECTOR_BODY1:.*]] |
| ; CHECK: [[VECTOR_BODY1]]: |
| ; CHECK-NEXT: [[TMP22:%.*]] = phi i64 [ 0, %[[VECTOR_BODY]] ], [ [[CURRENT_ITERATION_NEXT:%.*]], %[[VECTOR_BODY1]] ] |
| ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY1]] ] |
| ; CHECK-NEXT: [[TMP2:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY1]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[TMP1]], %[[VECTOR_BODY]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY1]] ] |
| ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_BODY]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY1]] ] |
| ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) |
| ; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP3]] to i64 |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP3]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP22]] |
| ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP23]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP3]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <vscale x 4 x i32> [[VP_OP_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i1> zeroinitializer, i32 [[TMP3]]) |
| ; CHECK-NEXT: [[TMP7:%.*]] = freeze <vscale x 4 x i1> [[TMP6]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP7]]) |
| ; CHECK-NEXT: [[TMP9]] = select i1 [[TMP8]], <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> [[TMP2]] |
| ; CHECK-NEXT: [[TMP10]] = select i1 [[TMP8]], <vscale x 4 x i32> [[VEC_IND]], <vscale x 4 x i32> [[VEC_PHI]] |
| ; CHECK-NEXT: [[CURRENT_ITERATION_NEXT]] = add i64 [[TMP11]], [[TMP22]] |
| ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 |
| ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> [[TMP10]], <vscale x 4 x i1> [[TMP9]], i32 0) |
| ; CHECK-NEXT: br label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] |
| %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv |
| %l = load i32, ptr %gep.src |
| %cmp103 = icmp eq i32 %l, 0 |
| %0 = trunc i64 %iv to i32 |
| %rdx.next = select i1 %cmp103, i32 %0, i32 %rdx |
| %iv.next = add i64 %iv, 1 |
| %ec = icmp eq i64 %iv, %n |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret i32 %rdx.next |
| } |
| |
| define i64 @select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { |
| ; CHECK-LABEL: define i64 @select_decreasing_induction_icmp_non_const_start( |
| ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 |
| ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 1) |
| ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[UMIN]] |
| ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[N]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP2]] |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[CURRENT_ITERATION_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP3]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP1]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) |
| ; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 |
| ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw i64 0, [[TMP5]] |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP7]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer |
| ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] |
| ; CHECK-NEXT: [[TMP38:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 |
| ; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP38]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = sub nuw nsw i64 [[TMP5]], 1 |
| ; CHECK-NEXT: [[TMP10:%.*]] = sub i64 0, [[TMP9]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP39]], i64 [[TMP10]] |
| ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr align 8 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP4]]) |
| ; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP4]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP38]] |
| ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP6]], i64 [[TMP10]] |
| ; CHECK-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr align 8 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP4]]) |
| ; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> [[VP_OP_LOAD4]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP4]]) |
| ; CHECK-NEXT: [[TMP16:%.*]] = icmp sgt <vscale x 4 x i64> [[TMP12]], [[TMP15]] |
| ; CHECK-NEXT: [[TMP18]] = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i64> [[VEC_IND]], <vscale x 4 x i64> [[VEC_PHI]], i32 [[TMP4]]) |
| ; CHECK-NEXT: [[TMP19]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI1]], i32 [[TMP4]]) |
| ; CHECK-NEXT: [[CURRENT_ITERATION_NEXT]] = add i64 [[TMP5]], [[INDEX]] |
| ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT3]] |
| ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 |
| ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[TMP53:%.*]] = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> [[TMP18]]) |
| ; CHECK-NEXT: [[TMP54:%.*]] = add nsw i64 [[TMP53]], -1 |
| ; CHECK-NEXT: [[TMP56:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP19]]) |
| ; CHECK-NEXT: [[TMP57:%.*]] = freeze i1 [[TMP56]] |
| ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP57]], i64 [[TMP54]], i64 [[RDX_START]] |
| ; CHECK-NEXT: br label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret i64 [[RDX_SELECT]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ %iv.next, %loop ], [ %n, %entry ] |
| %rdx = phi i64 [ %cond, %loop ], [ %rdx.start, %entry ] |
| %iv.next = add nsw i64 %iv, -1 |
| %gep.a.iv = getelementptr inbounds i64, ptr %a, i64 %iv.next |
| %ld.a = load i64, ptr %gep.a.iv, align 8 |
| %gep.b.iv = getelementptr inbounds i64, ptr %b, i64 %iv.next |
| %ld.b = load i64, ptr %gep.b.iv, align 8 |
| %cmp.a.b = icmp sgt i64 %ld.a, %ld.b |
| %cond = select i1 %cmp.a.b, i64 %iv.next, i64 %rdx |
| %exit.cond = icmp ugt i64 %iv, 1 |
| br i1 %exit.cond, label %loop, label %exit |
| |
| exit: |
| ret i64 %cond |
| } |