| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 |
| ; RUN: opt -passes=loop-vectorize \ |
| ; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL |
| |
| ; RUN: opt -passes=loop-vectorize \ |
| ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue \ |
| ; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP |
| |
| define void @test(i64 %n, ptr noalias %src0, ptr noalias %src1, ptr noalias %src2, ptr noalias %dst, i1 %c1, i1 %c2, i1 %c3) { |
| ; IF-EVL-LABEL: define void @test( |
| ; IF-EVL-SAME: i64 [[N:%.*]], ptr noalias [[SRC0:%.*]], ptr noalias [[SRC1:%.*]], ptr noalias [[SRC2:%.*]], ptr noalias [[DST:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; IF-EVL-NEXT: [[ENTRY:.*:]] |
| ; IF-EVL-NEXT: br label %[[VECTOR_PH:.*]] |
| ; IF-EVL: [[VECTOR_PH]]: |
| ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[C1]], i64 0 |
| ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer |
| ; IF-EVL-NEXT: [[TMP2:%.*]] = xor <vscale x 4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) |
| ; IF-EVL-NEXT: [[TMP0:%.*]] = xor i1 [[C2]], true |
| ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[TMP0]], i64 0 |
| ; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer |
| ; IF-EVL-NEXT: [[TMP1:%.*]] = or <vscale x 4 x i1> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] |
| ; IF-EVL-NEXT: [[TMP3:%.*]] = select <vscale x 4 x i1> [[TMP2]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i1> zeroinitializer |
| ; IF-EVL-NEXT: [[TMP4:%.*]] = or <vscale x 4 x i1> [[BROADCAST_SPLAT]], [[TMP3]] |
| ; IF-EVL-NEXT: [[TMP5:%.*]] = xor <vscale x 4 x i1> [[TMP1]], splat (i1 true) |
| ; IF-EVL-NEXT: [[TMP6:%.*]] = select <vscale x 4 x i1> [[TMP2]], <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i1> zeroinitializer |
| ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[C3]], i64 0 |
| ; IF-EVL-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer |
| ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; IF-EVL: [[VECTOR_BODY]]: |
| ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; IF-EVL-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) |
| ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP7]], i64 0 |
| ; IF-EVL-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
| ; IF-EVL-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32() |
| ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp ult <vscale x 4 x i32> [[TMP8]], [[BROADCAST_SPLAT6]] |
| ; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC0]], i64 [[EVL_BASED_IV]] |
| ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP10]], <vscale x 4 x i1> [[BROADCAST_SPLAT]], i32 [[TMP7]]) |
| ; IF-EVL-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[VP_OP_LOAD]] |
| ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[SRC1]], i64 [[EVL_BASED_IV]] |
| ; IF-EVL-NEXT: [[VP_OP_LOAD7:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> [[TMP4]], i32 [[TMP7]]) |
| ; IF-EVL-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[VP_OP_LOAD7]], [[PREDPHI]] |
| ; IF-EVL-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> zeroinitializer |
| ; IF-EVL-NEXT: [[TMP14:%.*]] = or <vscale x 4 x i1> [[TMP4]], [[TMP6]] |
| ; IF-EVL-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer |
| ; IF-EVL-NEXT: [[PREDPHI8:%.*]] = select <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[TMP12]] |
| ; IF-EVL-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[BROADCAST_SPLAT4]], <vscale x 4 x i1> zeroinitializer |
| ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[SRC2]], i64 [[EVL_BASED_IV]] |
| ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> [[TMP17]], <vscale x 4 x i32> poison) |
| ; IF-EVL-NEXT: [[TMP19:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[PREDPHI8]] |
| ; IF-EVL-NEXT: [[PREDPHI9:%.*]] = select i1 [[C3]], <vscale x 4 x i32> [[TMP19]], <vscale x 4 x i32> [[PREDPHI8]] |
| ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[EVL_BASED_IV]] |
| ; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[PREDPHI9]], ptr align 4 [[TMP20]], <vscale x 4 x i1> [[TMP15]]) |
| ; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP7]] to i64 |
| ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] |
| ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] |
| ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 |
| ; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; IF-EVL: [[MIDDLE_BLOCK]]: |
| ; IF-EVL-NEXT: br label %[[EXIT:.*]] |
| ; IF-EVL: [[EXIT]]: |
| ; IF-EVL-NEXT: ret void |
| ; |
| ; NO-VP-LABEL: define void @test( |
| ; NO-VP-SAME: i64 [[N:%.*]], ptr noalias [[SRC0:%.*]], ptr noalias [[SRC1:%.*]], ptr noalias [[SRC2:%.*]], ptr noalias [[DST:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; NO-VP-NEXT: [[ENTRY:.*]]: |
| ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() |
| ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 |
| ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] |
| ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; NO-VP: [[VECTOR_PH]]: |
| ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() |
| ; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 |
| ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] |
| ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] |
| ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[C3]], i64 0 |
| ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer |
| ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[C1]], i64 0 |
| ; NO-VP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer |
| ; NO-VP-NEXT: [[TMP6:%.*]] = xor <vscale x 4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) |
| ; NO-VP-NEXT: [[TMP4:%.*]] = xor i1 [[C2]], true |
| ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[TMP4]], i64 0 |
| ; NO-VP-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer |
| ; NO-VP-NEXT: [[TMP5:%.*]] = or <vscale x 4 x i1> [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT4]] |
| ; NO-VP-NEXT: [[TMP7:%.*]] = select <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i1> zeroinitializer |
| ; NO-VP-NEXT: [[TMP8:%.*]] = or <vscale x 4 x i1> [[BROADCAST_SPLAT2]], [[TMP7]] |
| ; NO-VP-NEXT: [[TMP9:%.*]] = xor <vscale x 4 x i1> [[TMP5]], splat (i1 true) |
| ; NO-VP-NEXT: [[TMP10:%.*]] = select <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i1> zeroinitializer |
| ; NO-VP-NEXT: [[TMP11:%.*]] = or <vscale x 4 x i1> [[TMP8]], [[TMP10]] |
| ; NO-VP-NEXT: [[TMP12:%.*]] = select <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i1> [[BROADCAST_SPLAT]], <vscale x 4 x i1> zeroinitializer |
| ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; NO-VP: [[VECTOR_BODY]]: |
| ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[SRC0]], i64 [[INDEX]] |
| ; NO-VP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> [[BROADCAST_SPLAT2]], <vscale x 4 x i32> poison) |
| ; NO-VP-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_LOAD]] |
| ; NO-VP-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[SRC1]], i64 [[INDEX]] |
| ; NO-VP-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> poison) |
| ; NO-VP-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_LOAD5]], [[PREDPHI]] |
| ; NO-VP-NEXT: [[PREDPHI6:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[TMP15]] |
| ; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[SRC2]], i64 [[INDEX]] |
| ; NO-VP-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i32> poison) |
| ; NO-VP-NEXT: [[TMP17:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_LOAD7]], [[PREDPHI6]] |
| ; NO-VP-NEXT: [[PREDPHI8:%.*]] = select i1 [[C3]], <vscale x 4 x i32> [[TMP17]], <vscale x 4 x i32> [[PREDPHI6]] |
| ; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] |
| ; NO-VP-NEXT: store <vscale x 4 x i32> [[PREDPHI8]], ptr [[TMP18]], align 4 |
| ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] |
| ; NO-VP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| ; NO-VP-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; NO-VP: [[MIDDLE_BLOCK]]: |
| ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] |
| ; NO-VP-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] |
| ; NO-VP: [[SCALAR_PH]]: |
| ; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] |
| ; NO-VP-NEXT: br label %[[LOOP:.*]] |
| ; NO-VP: [[LOOP]]: |
| ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] |
| ; NO-VP-NEXT: br i1 [[C1]], label %[[LOAD_V0:.*]], label %[[CHECK_COND1:.*]] |
| ; NO-VP: [[CHECK_COND1]]: |
| ; NO-VP-NEXT: [[NOT_C2:%.*]] = xor i1 [[C2]], true |
| ; NO-VP-NEXT: [[COND1:%.*]] = or i1 [[C1]], [[NOT_C2]] |
| ; NO-VP-NEXT: br i1 [[COND1]], label %[[LOAD_V1:.*]], label %[[LOAD_V2_CHECK:.*]] |
| ; NO-VP: [[LOAD_V0]]: |
| ; NO-VP-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[SRC0]], i64 [[IV]] |
| ; NO-VP-NEXT: [[V0:%.*]] = load i32, ptr [[GEP0]], align 4 |
| ; NO-VP-NEXT: br label %[[LOAD_V1]] |
| ; NO-VP: [[LOAD_V1]]: |
| ; NO-VP-NEXT: [[VAL0:%.*]] = phi i32 [ [[V0]], %[[LOAD_V0]] ], [ 0, %[[CHECK_COND1]] ] |
| ; NO-VP-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[IV]] |
| ; NO-VP-NEXT: [[V1:%.*]] = load i32, ptr [[GEP1]], align 4 |
| ; NO-VP-NEXT: [[VAL1:%.*]] = add i32 [[V1]], [[VAL0]] |
| ; NO-VP-NEXT: br label %[[LOAD_V2_CHECK]] |
| ; NO-VP: [[LOAD_V2_CHECK]]: |
| ; NO-VP-NEXT: [[VAL2:%.*]] = phi i32 [ [[VAL1]], %[[LOAD_V1]] ], [ 0, %[[CHECK_COND1]] ] |
| ; NO-VP-NEXT: br i1 [[C3]], label %[[LOAD_V2:.*]], label %[[LATCH]] |
| ; NO-VP: [[LOAD_V2]]: |
| ; NO-VP-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[IV]] |
| ; NO-VP-NEXT: [[V2:%.*]] = load i32, ptr [[GEP2]], align 4 |
| ; NO-VP-NEXT: [[VAL3:%.*]] = add i32 [[V2]], [[VAL2]] |
| ; NO-VP-NEXT: br label %[[LATCH]] |
| ; NO-VP: [[LATCH]]: |
| ; NO-VP-NEXT: [[RESULT:%.*]] = phi i32 [ [[VAL3]], %[[LOAD_V2]] ], [ [[VAL2]], %[[LOAD_V2_CHECK]] ] |
| ; NO-VP-NEXT: [[OUT:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] |
| ; NO-VP-NEXT: store i32 [[RESULT]], ptr [[OUT]], align 4 |
| ; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| ; NO-VP-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] |
| ; NO-VP-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] |
| ; NO-VP: [[EXIT]]: |
| ; NO-VP-NEXT: ret void |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] |
| br i1 %c1, label %load.v0, label %check.cond1 |
| |
| check.cond1: |
| %not.c2 = xor i1 %c2, true |
| %cond1 = or i1 %c1, %not.c2 |
| br i1 %cond1, label %load.v1, label %load.v2.check |
| |
| load.v0: |
| %gep0 = getelementptr inbounds i32, ptr %src0, i64 %iv |
| %v0 = load i32, ptr %gep0, align 4 |
| br label %load.v1 |
| |
| load.v1: |
| %val0 = phi i32 [ %v0, %load.v0 ], [ 0, %check.cond1 ] |
| %gep1 = getelementptr inbounds i32, ptr %src1, i64 %iv |
| %v1 = load i32, ptr %gep1, align 4 |
| %val1 = add i32 %v1, %val0 |
| br label %load.v2.check |
| |
| load.v2.check: |
| %val2 = phi i32 [ %val1, %load.v1 ], [ 0, %check.cond1 ] |
| br i1 %c3, label %load.v2, label %latch |
| |
| load.v2: |
| %gep2 = getelementptr inbounds i32, ptr %src2, i64 %iv |
| %v2 = load i32, ptr %gep2, align 4 |
| %val3 = add i32 %v2, %val2 |
| br label %latch |
| |
| latch: |
| %result = phi i32 [ %val3, %load.v2 ], [ %val2, %load.v2.check ] |
| %out = getelementptr inbounds i32, ptr %dst, i64 %iv |
| store i32 %result, ptr %out, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %exitcond = icmp eq i64 %iv.next, %n |
| br i1 %exitcond, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |