| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^middle.block:" --version 4 |
| ; RUN: opt -S --passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -enable-wide-lane-mask -force-vector-interleave=1 < %s | FileCheck %s -check-prefix CHECK-UF1 |
| ; RUN: opt -S --passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -enable-wide-lane-mask -force-vector-interleave=4 < %s | FileCheck %s -check-prefix CHECK-UF4 |
| |
| target triple = "aarch64-unknown-linux" |
| |
| define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, i64 %n) #0 { |
| ; CHECK-UF1-LABEL: define void @scalable_wide_active_lane_mask( |
| ; CHECK-UF1-SAME: ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-UF1-NEXT: entry: |
| ; CHECK-UF1-NEXT: br label [[VECTOR_PH1:%.*]] |
| ; CHECK-UF1: vector.ph: |
| ; CHECK-UF1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP5]], 16 |
| ; CHECK-UF1-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF1-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 4 |
| ; CHECK-UF1-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP18]] |
| ; CHECK-UF1-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP18]] |
| ; CHECK-UF1-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 |
| ; CHECK-UF1-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[N]]) |
| ; CHECK-UF1-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK-UF1: vector.body: |
| ; CHECK-UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF1-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH1]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] |
| ; CHECK-UF1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP10]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison) |
| ; CHECK-UF1-NEXT: [[TMP6:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], splat (i8 3) |
| ; CHECK-UF1-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] |
| ; CHECK-UF1-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP6]], ptr [[TMP13]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) |
| ; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] |
| ; CHECK-UF1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) |
| ; CHECK-UF1-NEXT: [[TMP14:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 |
| ; CHECK-UF1-NEXT: [[TMP11:%.*]] = xor i1 [[TMP14]], true |
| ; CHECK-UF1-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK-UF1: middle.block: |
| ; |
| ; CHECK-UF4-LABEL: define void @scalable_wide_active_lane_mask( |
| ; CHECK-UF4-SAME: ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-UF4-NEXT: entry: |
| ; CHECK-UF4-NEXT: br label [[VECTOR_PH1:%.*]] |
| ; CHECK-UF4: vector.ph: |
| ; CHECK-UF4-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP61]], 64 |
| ; CHECK-UF4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 6 |
| ; CHECK-UF4-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP3]] |
| ; CHECK-UF4-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP3]] |
| ; CHECK-UF4-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 |
| ; CHECK-UF4-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4 |
| ; CHECK-UF4-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]] |
| ; CHECK-UF4-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 5 |
| ; CHECK-UF4-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]] |
| ; CHECK-UF4-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 48 |
| ; CHECK-UF4-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP15]] |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 0, i64 [[N]]) |
| ; CHECK-UF4-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 48) |
| ; CHECK-UF4-NEXT: [[TMP18:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 32) |
| ; CHECK-UF4-NEXT: [[TMP17:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 16) |
| ; CHECK-UF4-NEXT: [[TMP16:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0) |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]]) |
| ; CHECK-UF4-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK-UF4: vector.body: |
| ; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[TMP16]], [[VECTOR_PH1]] ], [ [[TMP55:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi <vscale x 16 x i1> [ [[TMP17]], [[VECTOR_PH1]] ], [ [[TMP56:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi <vscale x 16 x i1> [ [[TMP18]], [[VECTOR_PH1]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[VECTOR_PH1]] ], [ [[TMP58:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] |
| ; CHECK-UF4-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 4 |
| ; CHECK-UF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP23]] |
| ; CHECK-UF4-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP32:%.*]] = shl nuw i64 [[TMP31]], 5 |
| ; CHECK-UF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP32]] |
| ; CHECK-UF4-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP34]], 48 |
| ; CHECK-UF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP29]] |
| ; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP20]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison) |
| ; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP24]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 16 x i8> poison) |
| ; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP33]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 16 x i8> poison) |
| ; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP30]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 16 x i8> poison) |
| ; CHECK-UF4-NEXT: [[TMP25:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], splat (i8 3) |
| ; CHECK-UF4-NEXT: [[TMP26:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD9]], splat (i8 3) |
| ; CHECK-UF4-NEXT: [[TMP27:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD10]], splat (i8 3) |
| ; CHECK-UF4-NEXT: [[TMP28:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD11]], splat (i8 3) |
| ; CHECK-UF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] |
| ; CHECK-UF4-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP38:%.*]] = shl nuw i64 [[TMP37]], 4 |
| ; CHECK-UF4-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP38]] |
| ; CHECK-UF4-NEXT: [[TMP40:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP41:%.*]] = shl nuw i64 [[TMP40]], 5 |
| ; CHECK-UF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP41]] |
| ; CHECK-UF4-NEXT: [[TMP43:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP44:%.*]] = mul nuw i64 [[TMP43]], 48 |
| ; CHECK-UF4-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP44]] |
| ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP25]], ptr [[TMP35]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]) |
| ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP26]], ptr [[TMP39]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK6]]) |
| ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP27]], ptr [[TMP42]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK7]]) |
| ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP28]], ptr [[TMP45]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK8]]) |
| ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP62]] |
| ; CHECK-UF4-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP47:%.*]] = shl nuw i64 [[TMP46]], 4 |
| ; CHECK-UF4-NEXT: [[TMP48:%.*]] = add i64 [[INDEX]], [[TMP47]] |
| ; CHECK-UF4-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP50:%.*]] = shl nuw i64 [[TMP49]], 5 |
| ; CHECK-UF4-NEXT: [[TMP51:%.*]] = add i64 [[INDEX]], [[TMP50]] |
| ; CHECK-UF4-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP53:%.*]] = mul nuw i64 [[TMP52]], 48 |
| ; CHECK-UF4-NEXT: [[TMP54:%.*]] = add i64 [[INDEX]], [[TMP53]] |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 [[INDEX]], i64 [[TMP9]]) |
| ; CHECK-UF4-NEXT: [[TMP58]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 48) |
| ; CHECK-UF4-NEXT: [[TMP57]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 32) |
| ; CHECK-UF4-NEXT: [[TMP56]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 16) |
| ; CHECK-UF4-NEXT: [[TMP55]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0) |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT12:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP48]], i64 [[TMP9]]) |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT13:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP51]], i64 [[TMP9]]) |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP54]], i64 [[TMP9]]) |
| ; CHECK-UF4-NEXT: [[TMP59:%.*]] = extractelement <vscale x 16 x i1> [[TMP55]], i32 0 |
| ; CHECK-UF4-NEXT: [[TMP60:%.*]] = xor i1 [[TMP59]], true |
| ; CHECK-UF4-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK-UF4: middle.block: |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] |
| %arrayidx1 = getelementptr inbounds i8, ptr %src, i64 %iv |
| %ld = load i8, ptr %arrayidx1 |
| %mul = mul i8 %ld, 3 |
| %arrayidx2 = getelementptr inbounds i8, ptr %dst, i64 %iv |
| store i8 %mul, ptr %arrayidx2 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %exitcond.not = icmp eq i64 %iv.next, %n |
| br i1 %exitcond.not, label %for.end, label %for.body |
| |
| for.end: |
| ret void |
| } |
| |
| define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonly %src, i64 %n) #0 { |
| ; CHECK-UF1-LABEL: define void @scalable_wide_active_lane_mask_double( |
| ; CHECK-UF1-SAME: ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { |
| ; CHECK-UF1-NEXT: entry: |
| ; CHECK-UF1-NEXT: [[CMP6:%.*]] = icmp sgt i64 [[N]], 0 |
| ; CHECK-UF1-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] |
| ; CHECK-UF1: for.body.preheader: |
| ; CHECK-UF1-NEXT: br label [[VECTOR_PH:%.*]] |
| ; CHECK-UF1: vector.ph: |
| ; CHECK-UF1-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP12]], 2 |
| ; CHECK-UF1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF1-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP2]], 1 |
| ; CHECK-UF1-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[TMP9]] |
| ; CHECK-UF1-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[N]], [[TMP9]] |
| ; CHECK-UF1-NEXT: [[TMP13:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0 |
| ; CHECK-UF1-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) |
| ; CHECK-UF1-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK-UF1: vector.body: |
| ; CHECK-UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF1-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[INDEX]] |
| ; CHECK-UF1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison) |
| ; CHECK-UF1-NEXT: [[TMP3:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD]], splat (double 3.000000e+00) |
| ; CHECK-UF1-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] |
| ; CHECK-UF1-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP3]], ptr [[TMP8]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) |
| ; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] |
| ; CHECK-UF1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP13]]) |
| ; CHECK-UF1-NEXT: [[TMP7:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 |
| ; CHECK-UF1-NEXT: [[TMP6:%.*]] = xor i1 [[TMP7]], true |
| ; CHECK-UF1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] |
| ; CHECK-UF1: middle.block: |
| ; |
| ; CHECK-UF4-LABEL: define void @scalable_wide_active_lane_mask_double( |
| ; CHECK-UF4-SAME: ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { |
| ; CHECK-UF4-NEXT: entry: |
| ; CHECK-UF4-NEXT: [[CMP6:%.*]] = icmp sgt i64 [[N]], 0 |
| ; CHECK-UF4-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] |
| ; CHECK-UF4: for.body.preheader: |
| ; CHECK-UF4-NEXT: br label [[VECTOR_PH:%.*]] |
| ; CHECK-UF4: vector.ph: |
| ; CHECK-UF4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 |
| ; CHECK-UF4-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP26:%.*]] = shl nuw i64 [[TMP4]], 3 |
| ; CHECK-UF4-NEXT: [[TMP31:%.*]] = sub i64 [[N]], [[TMP26]] |
| ; CHECK-UF4-NEXT: [[TMP56:%.*]] = icmp ugt i64 [[N]], [[TMP26]] |
| ; CHECK-UF4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = select i1 [[TMP56]], i64 [[TMP31]], i64 0 |
| ; CHECK-UF4-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 |
| ; CHECK-UF4-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP6]] |
| ; CHECK-UF4-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 |
| ; CHECK-UF4-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP8]] |
| ; CHECK-UF4-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 6 |
| ; CHECK-UF4-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP10]] |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) |
| ; CHECK-UF4-NEXT: [[TMP14:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 6) |
| ; CHECK-UF4-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 4) |
| ; CHECK-UF4-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 2) |
| ; CHECK-UF4-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0) |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]]) |
| ; CHECK-UF4-NEXT: br label [[VECTOR_BODY:%.*]] |
| ; CHECK-UF4: vector.body: |
| ; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[TMP11]], [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK6:%.*]] = phi <vscale x 2 x i1> [ [[TMP12]], [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi <vscale x 2 x i1> [ [[TMP13]], [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 2 x i1> [ [[TMP14]], [[VECTOR_PH]] ], [ [[TMP53:%.*]], [[VECTOR_BODY]] ] |
| ; CHECK-UF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[INDEX]] |
| ; CHECK-UF4-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 1 |
| ; CHECK-UF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP28]] |
| ; CHECK-UF4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 |
| ; CHECK-UF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP21]] |
| ; CHECK-UF4-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 6 |
| ; CHECK-UF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP24]] |
| ; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP15]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison) |
| ; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP29]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 2 x double> poison) |
| ; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP22]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 2 x double> poison) |
| ; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP25]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 2 x double> poison) |
| ; CHECK-UF4-NEXT: [[TMP16:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD]], splat (double 3.000000e+00) |
| ; CHECK-UF4-NEXT: [[TMP17:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD9]], splat (double 3.000000e+00) |
| ; CHECK-UF4-NEXT: [[TMP18:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD10]], splat (double 3.000000e+00) |
| ; CHECK-UF4-NEXT: [[TMP19:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD11]], splat (double 3.000000e+00) |
| ; CHECK-UF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] |
| ; CHECK-UF4-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP33:%.*]] = shl nuw i64 [[TMP32]], 1 |
| ; CHECK-UF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP33]] |
| ; CHECK-UF4-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP36:%.*]] = shl nuw i64 [[TMP35]], 2 |
| ; CHECK-UF4-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP36]] |
| ; CHECK-UF4-NEXT: [[TMP38:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP39:%.*]] = mul nuw i64 [[TMP38]], 6 |
| ; CHECK-UF4-NEXT: [[TMP40:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP39]] |
| ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP16]], ptr [[TMP30]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]) |
| ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP17]], ptr [[TMP34]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK6]]) |
| ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP18]], ptr [[TMP37]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK7]]) |
| ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP19]], ptr [[TMP40]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK8]]) |
| ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]] |
| ; CHECK-UF4-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP42:%.*]] = shl nuw i64 [[TMP41]], 1 |
| ; CHECK-UF4-NEXT: [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]] |
| ; CHECK-UF4-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP45:%.*]] = shl nuw i64 [[TMP44]], 2 |
| ; CHECK-UF4-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], [[TMP45]] |
| ; CHECK-UF4-NEXT: [[TMP47:%.*]] = call i64 @llvm.vscale.i64() |
| ; CHECK-UF4-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], 6 |
| ; CHECK-UF4-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], [[TMP48]] |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[WIDE_TRIP_COUNT]]) |
| ; CHECK-UF4-NEXT: [[TMP53]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 6) |
| ; CHECK-UF4-NEXT: [[TMP52]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 4) |
| ; CHECK-UF4-NEXT: [[TMP51]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 2) |
| ; CHECK-UF4-NEXT: [[TMP50]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0) |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT12:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP43]], i64 [[WIDE_TRIP_COUNT]]) |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT13:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP46]], i64 [[WIDE_TRIP_COUNT]]) |
| ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP49]], i64 [[WIDE_TRIP_COUNT]]) |
| ; CHECK-UF4-NEXT: [[TMP54:%.*]] = extractelement <vscale x 2 x i1> [[TMP50]], i32 0 |
| ; CHECK-UF4-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true |
| ; CHECK-UF4-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] |
| ; CHECK-UF4: middle.block: |
| ; |
| entry: |
| %cmp6 = icmp sgt i64 %n, 0 |
| br i1 %cmp6, label %for.body, label %for.end |
| |
| for.body: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] |
| %arrayidx1 = getelementptr inbounds double, ptr %src, i64 %iv |
| %ld = load double, ptr %arrayidx1 |
| %mul = fmul double %ld, 3.000000e+00 |
| %arrayidx2 = getelementptr inbounds double, ptr %dst, i64 %iv |
| store double %mul, ptr %arrayidx2 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %exitcond.not = icmp eq i64 %iv.next, %n |
| br i1 %exitcond.not, label %for.end, label %for.body |
| |
| for.end: |
| ret void |
| } |
| |
| attributes #0 = { nounwind vscale_range(1,16) "target-features"="+sve2p1" } |
| |
| ;. |
| ; CHECK-UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} |
| ; CHECK-UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} |
| ; CHECK-UF1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} |
| ; CHECK-UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} |
| ;. |
| ; CHECK-UF4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} |
| ; CHECK-UF4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} |
| ; CHECK-UF4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} |
| ; CHECK-UF4: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} |
| ;. |