| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt < %s -passes=loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilogue=predicate-dont-vectorize -enable-wide-lane-mask -S | FileCheck %s |
| |
| target triple = "thumbv8.1m.main-arm-unknown-eabihf" |
| |
| define void @f0(ptr noalias %dst, ptr readonly %src, i64 %n) #0 { |
| ; CHECK-LABEL: define void @f0( |
| ; CHECK-SAME: ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[VAL:%.*]] = icmp sgt i64 [[N]], 0 |
| ; CHECK-NEXT: br i1 [[VAL]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] |
| ; CHECK: [[FOR_BODY_PREHEADER]]: |
| ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 31 |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 32 |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 16 |
| ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[INDEX]], i64 [[N]]) |
| ; CHECK-NEXT: [[ACTIVE_LANE_MASK1:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[TMP0]], i64 [[N]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 |
| ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr align 1 [[TMP1]], <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison) |
| ; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr align 1 [[TMP3]], <16 x i1> [[ACTIVE_LANE_MASK1]], <16 x i8> poison) |
| ; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i8> [[WIDE_MASKED_LOAD]], splat (i8 3) |
| ; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i8> [[WIDE_MASKED_LOAD2]], splat (i8 3) |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 16 |
| ; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[TMP4]], ptr align 1 [[TMP6]], <16 x i1> [[ACTIVE_LANE_MASK]]) |
| ; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[TMP5]], ptr align 1 [[TMP8]], <16 x i1> [[ACTIVE_LANE_MASK1]]) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32 |
| ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT:.*]] |
| ; CHECK: [[FOR_END_LOOPEXIT]]: |
| ; CHECK-NEXT: br label %[[FOR_END]] |
| ; CHECK: [[FOR_END]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %val = icmp sgt i64 %n, 0 |
| br i1 %val, label %for.body, label %for.end |
| |
| for.body: |
| %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] |
| %arrayidx = getelementptr inbounds i8, ptr %src, i64 %indvars.iv |
| %0 = load i8, ptr %arrayidx, align 1 |
| %mul = mul i8 %0, 3 |
| %arrayidx3 = getelementptr inbounds i8, ptr %dst, i64 %indvars.iv |
| store i8 %mul, ptr %arrayidx3, align 1 |
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| %exitcond.not = icmp eq i64 %indvars.iv.next, %n |
| br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 |
| |
| for.end: |
| ret void |
| } |
| |
| attributes #0 = { nofree norecurse nounwind "target-features"="+armv8.1-m.main,+mve.fp" } |
| |
| !1 = distinct !{!1, !2, !3} |
| !2 = !{!"llvm.loop.vectorize.width", i32 16} |
| !3 = !{!"llvm.loop.interleave.count", i32 2} |
| ;. |
| ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} |
| ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} |
| ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} |
| ;. |