blob: 1aa53e1ef95a090192a6c637c31988bec81dc953 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
; RUN: opt -passes=loop-vectorize \
; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL
; RUN: opt -passes=loop-vectorize \
; RUN: -prefer-predicate-over-epilogue=scalar-epilogue \
; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP
define void @test(i64 %n, ptr noalias %src0, ptr noalias %src1, ptr noalias %src2, ptr noalias %dst, i1 %c1, i1 %c2, i1 %c3) {
; IF-EVL-LABEL: define void @test(
; IF-EVL-SAME: i64 [[N:%.*]], ptr noalias [[SRC0:%.*]], ptr noalias [[SRC1:%.*]], ptr noalias [[SRC2:%.*]], ptr noalias [[DST:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR0:[0-9]+]] {
; IF-EVL-NEXT: [[ENTRY:.*:]]
; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[C1]], i64 0
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
; IF-EVL-NEXT: [[TMP2:%.*]] = xor <vscale x 4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; IF-EVL-NEXT: [[TMP0:%.*]] = xor i1 [[C2]], true
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[TMP0]], i64 0
; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
; IF-EVL-NEXT: [[TMP1:%.*]] = or <vscale x 4 x i1> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
; IF-EVL-NEXT: [[TMP3:%.*]] = select <vscale x 4 x i1> [[TMP2]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i1> zeroinitializer
; IF-EVL-NEXT: [[TMP4:%.*]] = or <vscale x 4 x i1> [[BROADCAST_SPLAT]], [[TMP3]]
; IF-EVL-NEXT: [[TMP5:%.*]] = xor <vscale x 4 x i1> [[TMP1]], splat (i1 true)
; IF-EVL-NEXT: [[TMP6:%.*]] = select <vscale x 4 x i1> [[TMP2]], <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i1> zeroinitializer
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[C3]], i64 0
; IF-EVL-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP7]], i64 0
; IF-EVL-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; IF-EVL-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; IF-EVL-NEXT: [[TMP9:%.*]] = icmp ult <vscale x 4 x i32> [[TMP8]], [[BROADCAST_SPLAT6]]
; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC0]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP10]], <vscale x 4 x i1> [[BROADCAST_SPLAT]], i32 [[TMP7]])
; IF-EVL-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[VP_OP_LOAD]]
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[SRC1]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD7:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> [[TMP4]], i32 [[TMP7]])
; IF-EVL-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[VP_OP_LOAD7]], [[PREDPHI]]
; IF-EVL-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> zeroinitializer
; IF-EVL-NEXT: [[TMP14:%.*]] = or <vscale x 4 x i1> [[TMP4]], [[TMP6]]
; IF-EVL-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
; IF-EVL-NEXT: [[PREDPHI8:%.*]] = select <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[BROADCAST_SPLAT4]], <vscale x 4 x i1> zeroinitializer
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[SRC2]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> [[TMP17]], <vscale x 4 x i32> poison)
; IF-EVL-NEXT: [[TMP19:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[PREDPHI8]]
; IF-EVL-NEXT: [[PREDPHI9:%.*]] = select i1 [[C3]], <vscale x 4 x i32> [[TMP19]], <vscale x 4 x i32> [[PREDPHI8]]
; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[PREDPHI9]], ptr align 4 [[TMP20]], <vscale x 4 x i1> [[TMP15]])
; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP7]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]]
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br label %[[EXIT:.*]]
; IF-EVL: [[EXIT]]:
; IF-EVL-NEXT: ret void
;
; NO-VP-LABEL: define void @test(
; NO-VP-SAME: i64 [[N:%.*]], ptr noalias [[SRC0:%.*]], ptr noalias [[SRC1:%.*]], ptr noalias [[SRC2:%.*]], ptr noalias [[DST:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR0:[0-9]+]] {
; NO-VP-NEXT: [[ENTRY:.*]]:
; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; NO-VP: [[VECTOR_PH]]:
; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[C3]], i64 0
; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[C1]], i64 0
; NO-VP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
; NO-VP-NEXT: [[TMP6:%.*]] = xor <vscale x 4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
; NO-VP-NEXT: [[TMP4:%.*]] = xor i1 [[C2]], true
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[TMP4]], i64 0
; NO-VP-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
; NO-VP-NEXT: [[TMP5:%.*]] = or <vscale x 4 x i1> [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT4]]
; NO-VP-NEXT: [[TMP7:%.*]] = select <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i1> zeroinitializer
; NO-VP-NEXT: [[TMP8:%.*]] = or <vscale x 4 x i1> [[BROADCAST_SPLAT2]], [[TMP7]]
; NO-VP-NEXT: [[TMP9:%.*]] = xor <vscale x 4 x i1> [[TMP5]], splat (i1 true)
; NO-VP-NEXT: [[TMP10:%.*]] = select <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i1> zeroinitializer
; NO-VP-NEXT: [[TMP11:%.*]] = or <vscale x 4 x i1> [[TMP8]], [[TMP10]]
; NO-VP-NEXT: [[TMP12:%.*]] = select <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i1> [[BROADCAST_SPLAT]], <vscale x 4 x i1> zeroinitializer
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
; NO-VP: [[VECTOR_BODY]]:
; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[SRC0]], i64 [[INDEX]]
; NO-VP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> [[BROADCAST_SPLAT2]], <vscale x 4 x i32> poison)
; NO-VP-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_LOAD]]
; NO-VP-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[SRC1]], i64 [[INDEX]]
; NO-VP-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> poison)
; NO-VP-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_LOAD5]], [[PREDPHI]]
; NO-VP-NEXT: [[PREDPHI6:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[TMP15]]
; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[SRC2]], i64 [[INDEX]]
; NO-VP-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i32> poison)
; NO-VP-NEXT: [[TMP17:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_LOAD7]], [[PREDPHI6]]
; NO-VP-NEXT: [[PREDPHI8:%.*]] = select i1 [[C3]], <vscale x 4 x i32> [[TMP17]], <vscale x 4 x i32> [[PREDPHI6]]
; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
; NO-VP-NEXT: store <vscale x 4 x i32> [[PREDPHI8]], ptr [[TMP18]], align 4
; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; NO-VP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-VP-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; NO-VP: [[MIDDLE_BLOCK]]:
; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; NO-VP-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; NO-VP: [[SCALAR_PH]]:
; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; NO-VP-NEXT: br label %[[LOOP:.*]]
; NO-VP: [[LOOP]]:
; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
; NO-VP-NEXT: br i1 [[C1]], label %[[LOAD_V0:.*]], label %[[CHECK_COND1:.*]]
; NO-VP: [[CHECK_COND1]]:
; NO-VP-NEXT: [[NOT_C2:%.*]] = xor i1 [[C2]], true
; NO-VP-NEXT: [[COND1:%.*]] = or i1 [[C1]], [[NOT_C2]]
; NO-VP-NEXT: br i1 [[COND1]], label %[[LOAD_V1:.*]], label %[[LOAD_V2_CHECK:.*]]
; NO-VP: [[LOAD_V0]]:
; NO-VP-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[SRC0]], i64 [[IV]]
; NO-VP-NEXT: [[V0:%.*]] = load i32, ptr [[GEP0]], align 4
; NO-VP-NEXT: br label %[[LOAD_V1]]
; NO-VP: [[LOAD_V1]]:
; NO-VP-NEXT: [[VAL0:%.*]] = phi i32 [ [[V0]], %[[LOAD_V0]] ], [ 0, %[[CHECK_COND1]] ]
; NO-VP-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[IV]]
; NO-VP-NEXT: [[V1:%.*]] = load i32, ptr [[GEP1]], align 4
; NO-VP-NEXT: [[VAL1:%.*]] = add i32 [[V1]], [[VAL0]]
; NO-VP-NEXT: br label %[[LOAD_V2_CHECK]]
; NO-VP: [[LOAD_V2_CHECK]]:
; NO-VP-NEXT: [[VAL2:%.*]] = phi i32 [ [[VAL1]], %[[LOAD_V1]] ], [ 0, %[[CHECK_COND1]] ]
; NO-VP-NEXT: br i1 [[C3]], label %[[LOAD_V2:.*]], label %[[LATCH]]
; NO-VP: [[LOAD_V2]]:
; NO-VP-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[IV]]
; NO-VP-NEXT: [[V2:%.*]] = load i32, ptr [[GEP2]], align 4
; NO-VP-NEXT: [[VAL3:%.*]] = add i32 [[V2]], [[VAL2]]
; NO-VP-NEXT: br label %[[LATCH]]
; NO-VP: [[LATCH]]:
; NO-VP-NEXT: [[RESULT:%.*]] = phi i32 [ [[VAL3]], %[[LOAD_V2]] ], [ [[VAL2]], %[[LOAD_V2_CHECK]] ]
; NO-VP-NEXT: [[OUT:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
; NO-VP-NEXT: store i32 [[RESULT]], ptr [[OUT]], align 4
; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; NO-VP-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; NO-VP-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; NO-VP: [[EXIT]]:
; NO-VP-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
br i1 %c1, label %load.v0, label %check.cond1
check.cond1:
%not.c2 = xor i1 %c2, true
%cond1 = or i1 %c1, %not.c2
br i1 %cond1, label %load.v1, label %load.v2.check
load.v0:
%gep0 = getelementptr inbounds i32, ptr %src0, i64 %iv
%v0 = load i32, ptr %gep0, align 4
br label %load.v1
load.v1:
%val0 = phi i32 [ %v0, %load.v0 ], [ 0, %check.cond1 ]
%gep1 = getelementptr inbounds i32, ptr %src1, i64 %iv
%v1 = load i32, ptr %gep1, align 4
%val1 = add i32 %v1, %val0
br label %load.v2.check
load.v2.check:
%val2 = phi i32 [ %val1, %load.v1 ], [ 0, %check.cond1 ]
br i1 %c3, label %load.v2, label %latch
load.v2:
%gep2 = getelementptr inbounds i32, ptr %src2, i64 %iv
%v2 = load i32, ptr %gep2, align 4
%val3 = add i32 %v2, %val2
br label %latch
latch:
%result = phi i32 [ %val3, %load.v2 ], [ %val2, %load.v2.check ]
%out = getelementptr inbounds i32, ptr %dst, i64 %iv
store i32 %result, ptr %out, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}