| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -p loop-vectorize -S %s | FileCheck %s |
| |
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" |
| target triple = "aarch64-unknown-linux" |
| |
| ; Test case from https://github.com/llvm/llvm-project/issues/148431. |
| define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8 %n, i64 %off) #0 { |
| ; CHECK-LABEL: define void @test_predicated_load_cast_hint( |
| ; CHECK-SAME: ptr [[DST_1:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]], i8 [[N:%.*]], i64 [[OFF:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[N_EXT:%.*]] = sext i8 [[N]] to i32 |
| ; CHECK-NEXT: [[N_SUB:%.*]] = add i32 [[N_EXT]], -15 |
| ; CHECK-NEXT: [[SMAX16:%.*]] = call i32 @llvm.smax.i32(i32 [[N_SUB]], i32 4) |
| ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SMAX16]], -1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2 |
| ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1 |
| ; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] |
| ; CHECK: [[VECTOR_SCEVCHECK]]: |
| ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N_SUB]], i32 4) |
| ; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[SMAX]], -1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 2 |
| ; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 |
| ; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 4, i8 [[TMP5]]) |
| ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 |
| ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 |
| ; CHECK-NEXT: [[TMP6:%.*]] = add i8 4, [[MUL_RESULT]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i8 [[TMP6]], 4 |
| ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[TMP4]], 255 |
| ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[OFF]], 3 |
| ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST_1]], i64 [[TMP11]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP4]] to i64 |
| ; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 512, i64 [[TMP12]]) |
| ; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 |
| ; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 |
| ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[SCEVGEP]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW3]] |
| ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]] |
| ; CHECK-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] |
| ; CHECK: [[VECTOR_MEMCHECK]]: |
| ; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST_2]], i64 1 |
| ; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 1 |
| ; CHECK-NEXT: [[TMP18:%.*]] = shl i64 [[OFF]], 3 |
| ; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[DST_1]], i64 [[TMP18]] |
| ; CHECK-NEXT: [[SMAX7:%.*]] = call i32 @llvm.smax.i32(i32 [[N_SUB]], i32 4) |
| ; CHECK-NEXT: [[TMP19:%.*]] = add nsw i32 [[SMAX7]], -1 |
| ; CHECK-NEXT: [[TMP20:%.*]] = zext nneg i32 [[TMP19]] to i64 |
| ; CHECK-NEXT: [[TMP21:%.*]] = lshr i64 [[TMP20]], 2 |
| ; CHECK-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[TMP21]], 9 |
| ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[TMP18]] |
| ; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[TMP23]], 8 |
| ; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[DST_1]], i64 [[TMP24]] |
| ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP5]] |
| ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP4]] |
| ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] |
| ; CHECK-NEXT: [[BOUND09:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP8]] |
| ; CHECK-NEXT: [[BOUND110:%.*]] = icmp ult ptr [[SCEVGEP6]], [[SCEVGEP4]] |
| ; CHECK-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]] |
| ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]] |
| ; CHECK-NEXT: [[BOUND012:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP8]] |
| ; CHECK-NEXT: [[BOUND113:%.*]] = icmp ult ptr [[SCEVGEP6]], [[SCEVGEP5]] |
| ; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]] |
| ; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT14]] |
| ; CHECK-NEXT: br i1 [[CONFLICT_RDX15]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 [[TMP2]]) |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE22:.*]] ] |
| ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE22]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 4, i8 8, i8 12>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE22]] ] |
| ; CHECK-NEXT: [[TMP28:%.*]] = load i8, ptr [[SRC]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP28]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP25:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP26:%.*]] = zext <4 x i8> [[VEC_IND]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 |
| ; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] |
| ; CHECK: [[PRED_STORE_IF]]: |
| ; CHECK-NEXT: [[TMP102:%.*]] = extractelement <4 x i64> [[TMP26]], i32 0 |
| ; CHECK-NEXT: [[TMP103:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP102]], i64 [[OFF]] |
| ; CHECK-NEXT: [[TMP104:%.*]] = extractelement <4 x i64> [[TMP25]], i32 0 |
| ; CHECK-NEXT: [[TMP105:%.*]] = or i64 [[TMP104]], 1 |
| ; CHECK-NEXT: store i64 [[TMP105]], ptr [[TMP103]], align 8, !alias.scope [[META3]] |
| ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] |
| ; CHECK: [[PRED_STORE_CONTINUE]]: |
| ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1 |
| ; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]] |
| ; CHECK: [[PRED_STORE_IF17]]: |
| ; CHECK-NEXT: [[TMP108:%.*]] = extractelement <4 x i64> [[TMP26]], i32 1 |
| ; CHECK-NEXT: [[TMP109:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP108]], i64 [[OFF]] |
| ; CHECK-NEXT: [[TMP110:%.*]] = extractelement <4 x i64> [[TMP25]], i32 1 |
| ; CHECK-NEXT: [[TMP111:%.*]] = or i64 [[TMP110]], 1 |
| ; CHECK-NEXT: store i64 [[TMP111]], ptr [[TMP109]], align 8, !alias.scope [[META3]] |
| ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]] |
| ; CHECK: [[PRED_STORE_CONTINUE18]]: |
| ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2 |
| ; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] |
| ; CHECK: [[PRED_STORE_IF19]]: |
| ; CHECK-NEXT: [[TMP114:%.*]] = extractelement <4 x i64> [[TMP26]], i32 2 |
| ; CHECK-NEXT: [[TMP115:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP114]], i64 [[OFF]] |
| ; CHECK-NEXT: [[TMP116:%.*]] = extractelement <4 x i64> [[TMP25]], i32 2 |
| ; CHECK-NEXT: [[TMP117:%.*]] = or i64 [[TMP116]], 1 |
| ; CHECK-NEXT: store i64 [[TMP117]], ptr [[TMP115]], align 8, !alias.scope [[META3]] |
| ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] |
| ; CHECK: [[PRED_STORE_CONTINUE20]]: |
| ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3 |
| ; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22]] |
| ; CHECK: [[PRED_STORE_IF21]]: |
| ; CHECK-NEXT: [[TMP120:%.*]] = extractelement <4 x i64> [[TMP26]], i32 3 |
| ; CHECK-NEXT: [[TMP121:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP120]], i64 [[OFF]] |
| ; CHECK-NEXT: [[TMP122:%.*]] = extractelement <4 x i64> [[TMP25]], i32 3 |
| ; CHECK-NEXT: [[TMP123:%.*]] = or i64 [[TMP122]], 1 |
| ; CHECK-NEXT: store i64 [[TMP123]], ptr [[TMP121]], align 8, !alias.scope [[META3]] |
| ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] |
| ; CHECK: [[PRED_STORE_CONTINUE22]]: |
| ; CHECK-NEXT: store i8 0, ptr [[DST_2]], align 1, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 |
| ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX_NEXT]], i32 [[TMP2]]) |
| ; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 |
| ; CHECK-NEXT: [[TMP48:%.*]] = xor i1 [[TMP47]], true |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 16) |
| ; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[EXIT:.*]] |
| ; CHECK: [[SCALAR_PH]]: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[SRC]], align 1 |
| ; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i64 |
| ; CHECK-NEXT: [[ADD:%.*]] = or i64 [[L_EXT]], 1 |
| ; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 |
| ; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[IV_EXT]], i64 [[OFF]] |
| ; CHECK-NEXT: store i64 [[ADD]], ptr [[GEP_DST_1]], align 8 |
| ; CHECK-NEXT: store i8 0, ptr [[DST_2]], align 1 |
| ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 4 |
| ; CHECK-NEXT: [[IV_NEXT_EXT:%.*]] = zext i8 [[IV_NEXT]] to i32 |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N_SUB]], [[IV_NEXT_EXT]] |
| ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP12:![0-9]+]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %n.ext = sext i8 %n to i32 |
| %n.sub = add i32 %n.ext, -15 |
| br label %loop |
| |
| loop: |
| %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] |
| %l = load i8, ptr %src, align 1 |
| %l.ext = zext i8 %l to i64 |
| %add = or i64 %l.ext, 1 |
| %iv.ext = zext i8 %iv to i64 |
| %gep.dst.1 = getelementptr [16 x i64], ptr %dst.1, i64 %iv.ext, i64 %off |
| store i64 %add, ptr %gep.dst.1, align 8 |
| store i8 0, ptr %dst.2, align 1 |
| %iv.next = add i8 %iv, 4 |
| %iv.next.ext = zext i8 %iv.next to i32 |
| %cmp = icmp sgt i32 %n.sub, %iv.next.ext |
| br i1 %cmp, label %loop, label %exit, !llvm.loop !0 |
| |
| exit: |
| ret void |
| } |
| |
| ; Check computing costs for sdiv/udiv with invariant divisor and tail folding. |
| ; From https://github.com/llvm/llvm-project/issues/160354. |
| define void @srem_sdiv_with_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %end) #0 { |
| ; CHECK-LABEL: define void @srem_sdiv_with_tail_folding( |
| ; CHECK-SAME: i32 [[D_0:%.*]], i32 [[D_1:%.*]], ptr [[DST:%.*]], i32 [[END:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] |
| ; CHECK-NEXT: [[IV_SUB:%.*]] = add nsw i32 [[IV]], -1 |
| ; CHECK-NEXT: [[REM:%.*]] = srem i32 [[IV_SUB]], [[D_0]] |
| ; CHECK-NEXT: [[REM_1:%.*]] = add nsw i32 [[REM]], 1 |
| ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[REM_1]], [[D_0]] |
| ; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] |
| ; CHECK: [[THEN]]: |
| ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[IV_SUB]], [[D_1]] |
| ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[DIV]], 1 |
| ; CHECK-NEXT: [[ADD_1_EXT:%.*]] = sext i32 [[ADD_1]] to i64 |
| ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[ADD_1_EXT]] |
| ; CHECK-NEXT: store i32 [[IV]], ptr [[GEP_DST]], align 4 |
| ; CHECK-NEXT: br label %[[LOOP_LATCH]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 |
| ; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[END]] |
| ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| %iv.sub = add nsw i32 %iv, -1 |
| %rem = srem i32 %iv.sub, %d.0 |
| %rem.1 = add nsw i32 %rem, 1 |
| %c = icmp eq i32 %rem.1, %d.0 |
| br i1 %c, label %then, label %loop.latch |
| |
| then: |
| %div = sdiv i32 %iv.sub, %d.1 |
| %add.1 = add i32 %div, 1 |
| %add.1.ext = sext i32 %add.1 to i64 |
| %gep.dst = getelementptr i32, ptr %dst, i64 %add.1.ext |
| store i32 %iv, ptr %gep.dst, align 4 |
| br label %loop.latch |
| |
| loop.latch: |
| %iv.next = add nuw nsw i32 %iv, 1 |
| %ec = icmp ne i32 %iv.next, %end |
| br i1 %ec, label %loop.header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Check computing costs for predicated sdiv/udiv with invariant divisor without tail folding. |
| ; From https://github.com/llvm/llvm-project/issues/160356. |
| define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %end) #1 { |
| ; CHECK-LABEL: define void @srem_sdiv_without_tail_folding( |
| ; CHECK-SAME: i32 [[D_0:%.*]], i32 [[D_1:%.*]], ptr [[DST:%.*]], i32 [[END:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[END]], 4 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[END]], 4 |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[END]], [[N_MOD_VF]] |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[D_0]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -1) |
| ; CHECK-NEXT: [[TMP1:%.*]] = srem <4 x i32> [[TMP0]], [[BROADCAST_SPLAT]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], splat (i32 1) |
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], [[BROADCAST_SPLAT]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 |
| ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]] |
| ; CHECK: [[PRED_SDIV_IF]]: |
| ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0 |
| ; CHECK-NEXT: [[TMP6:%.*]] = sdiv i32 [[TMP5]], [[D_1]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 |
| ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]] |
| ; CHECK: [[PRED_SDIV_CONTINUE]]: |
| ; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP7]], %[[PRED_SDIV_IF]] ] |
| ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 |
| ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2:.*]] |
| ; CHECK: [[PRED_SDIV_IF1]]: |
| ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1 |
| ; CHECK-NEXT: [[TMP11:%.*]] = sdiv i32 [[TMP10]], [[D_1]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1 |
| ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE2]] |
| ; CHECK: [[PRED_SDIV_CONTINUE2]]: |
| ; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], %[[PRED_SDIV_CONTINUE]] ], [ [[TMP12]], %[[PRED_SDIV_IF1]] ] |
| ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 |
| ; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_SDIV_IF3:.*]], label %[[PRED_SDIV_CONTINUE4:.*]] |
| ; CHECK: [[PRED_SDIV_IF3]]: |
| ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2 |
| ; CHECK-NEXT: [[TMP16:%.*]] = sdiv i32 [[TMP15]], [[D_1]] |
| ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2 |
| ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE4]] |
| ; CHECK: [[PRED_SDIV_CONTINUE4]]: |
| ; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], %[[PRED_SDIV_CONTINUE2]] ], [ [[TMP17]], %[[PRED_SDIV_IF3]] ] |
| ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 |
| ; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_SDIV_IF5:.*]], label %[[PRED_SDIV_CONTINUE6:.*]] |
| ; CHECK: [[PRED_SDIV_IF5]]: |
| ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 |
| ; CHECK-NEXT: [[TMP21:%.*]] = sdiv i32 [[TMP20]], [[D_1]] |
| ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3 |
| ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE6]] |
| ; CHECK: [[PRED_SDIV_CONTINUE6]]: |
| ; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], %[[PRED_SDIV_CONTINUE4]] ], [ [[TMP22]], %[[PRED_SDIV_IF5]] ] |
| ; CHECK-NEXT: [[TMP24:%.*]] = add <4 x i32> [[TMP23]], splat (i32 1) |
| ; CHECK-NEXT: [[TMP25:%.*]] = sext <4 x i32> [[TMP24]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 |
| ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] |
| ; CHECK: [[PRED_STORE_IF]]: |
| ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP25]], i32 0 |
| ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP27]] |
| ; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[INDEX]], 0 |
| ; CHECK-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 |
| ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] |
| ; CHECK: [[PRED_STORE_CONTINUE]]: |
| ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 |
| ; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] |
| ; CHECK: [[PRED_STORE_IF7]]: |
| ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP25]], i32 1 |
| ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP31]] |
| ; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[INDEX]], 1 |
| ; CHECK-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 |
| ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]] |
| ; CHECK: [[PRED_STORE_CONTINUE8]]: |
| ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 |
| ; CHECK-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] |
| ; CHECK: [[PRED_STORE_IF9]]: |
| ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i64> [[TMP25]], i32 2 |
| ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP35]] |
| ; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[INDEX]], 2 |
| ; CHECK-NEXT: store i32 [[TMP37]], ptr [[TMP36]], align 4 |
| ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]] |
| ; CHECK: [[PRED_STORE_CONTINUE10]]: |
| ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 |
| ; CHECK-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12]] |
| ; CHECK: [[PRED_STORE_IF11]]: |
| ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i64> [[TMP25]], i32 3 |
| ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP39]] |
| ; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[INDEX]], 3 |
| ; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4 |
| ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]] |
| ; CHECK: [[PRED_STORE_CONTINUE12]]: |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) |
| ; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[TMP42]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[END]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] |
| ; CHECK: [[SCALAR_PH]]: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] |
| ; CHECK-NEXT: [[IV_SUB:%.*]] = add nsw i32 [[IV]], -1 |
| ; CHECK-NEXT: [[REM:%.*]] = srem i32 [[IV_SUB]], [[D_0]] |
| ; CHECK-NEXT: [[REM_1:%.*]] = add nsw i32 [[REM]], 1 |
| ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[REM_1]], [[D_0]] |
| ; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] |
| ; CHECK: [[THEN]]: |
| ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[IV_SUB]], [[D_1]] |
| ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[DIV]], 1 |
| ; CHECK-NEXT: [[ADD_1_EXT:%.*]] = sext i32 [[ADD_1]] to i64 |
| ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[ADD_1_EXT]] |
| ; CHECK-NEXT: store i32 [[IV]], ptr [[GEP_DST]], align 4 |
| ; CHECK-NEXT: br label %[[LOOP_LATCH]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 |
| ; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[END]] |
| ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP14:![0-9]+]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| %iv.sub = add nsw i32 %iv, -1 |
| %rem = srem i32 %iv.sub, %d.0 |
| %rem.1 = add nsw i32 %rem, 1 |
| %c = icmp eq i32 %rem.1, %d.0 |
| br i1 %c, label %then, label %loop.latch |
| |
| then: |
| %div = sdiv i32 %iv.sub, %d.1 |
| %add.1 = add i32 %div, 1 |
| %add.1.ext = sext i32 %add.1 to i64 |
| %gep.dst = getelementptr i32, ptr %dst, i64 %add.1.ext |
| store i32 %iv, ptr %gep.dst, align 4 |
| br label %loop.latch |
| |
| loop.latch: |
| %iv.next = add nuw nsw i32 %iv, 1 |
| %ec = icmp ne i32 %iv.next, %end |
| br i1 %ec, label %loop.header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| attributes #0 = { "target-cpu"="neoverse-v1" } |
| attributes #1 = { "target-cpu"="neoverse-v2" } |
| |
| !0 = distinct !{!0, !1, !2, !3} |
| !1 = !{!"llvm.loop.mustprogress"} |
| !2 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} |
| !3 = !{!"llvm.loop.vectorize.enable", i1 true} |
| ;. |
| ; CHECK: [[META0]] = !{[[META1:![0-9]+]]} |
| ; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]} |
| ; CHECK: [[META2]] = distinct !{[[META2]], !"LVerDomain"} |
| ; CHECK: [[META3]] = !{[[META4:![0-9]+]]} |
| ; CHECK: [[META4]] = distinct !{[[META4]], [[META2]]} |
| ; CHECK: [[META5]] = !{[[META6:![0-9]+]]} |
| ; CHECK: [[META6]] = distinct !{[[META6]], [[META2]]} |
| ; CHECK: [[META7]] = !{[[META1]], [[META4]]} |
| ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]} |
| ; CHECK: [[META9]] = !{!"llvm.loop.mustprogress"} |
| ; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1} |
| ; CHECK: [[META11]] = !{!"llvm.loop.unroll.runtime.disable"} |
| ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META9]], [[META10]]} |
| ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META10]], [[META11]]} |
| ; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META11]], [[META10]]} |
| ;. |