blob: 8c62ffd9c7a98f16c1aa8aaadcb9dcf1afe0da45 [file]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph\:" --version 5
; RUN: opt -passes=loop-vectorize -S < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
; CHECK-LABEL: define void @trip7_i64(
; CHECK-SAME: ptr noalias noundef captures(none) [[DST:%.*]], ptr noalias noundef readonly captures(none) [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 7)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP5]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
; CHECK-NEXT: [[TMP6:%.*]] = shl nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], splat (i64 1)
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD1]], [[TMP6]]
; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP8]], ptr align 8 [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 7)
; CHECK-NEXT: [[EXTRACT_FIRST_LANE_MASK:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; CHECK-NEXT: [[COND:%.*]] = xor i1 [[EXTRACT_FIRST_LANE_MASK]], true
; CHECK-NEXT: br i1 [[COND]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.src = getelementptr inbounds i64, ptr %src, i64 %iv
%0 = load i64, ptr %gep.src, align 8
%mul = shl nsw i64 %0, 1
%gep.dst = getelementptr inbounds i64, ptr %dst, i64 %iv
%1 = load i64, ptr %gep.dst, align 8
%add = add nsw i64 %1, %mul
store i64 %add, ptr %gep.dst, align 8
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, 7
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
; CHECK-LABEL: define void @trip5_i8(
; CHECK-SAME: ptr noalias noundef captures(none) [[DST:%.*]], ptr noalias noundef readonly captures(none) [[SRC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[GEP_SRC]], align 1
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP0]], 1
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[GEP_DST]], align 1
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP1]]
; CHECK-NEXT: store i8 [[ADD]], ptr [[GEP_DST]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 5
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep.src = getelementptr inbounds i8, ptr %src, i64 %iv
%0 = load i8, ptr %gep.src, align 1
%mul = shl i8 %0, 1
%gep.dst = getelementptr inbounds i8, ptr %dst, i64 %iv
%1 = load i8, ptr %gep.dst, align 1
%add = add i8 %mul, %1
store i8 %add, ptr %gep.dst, align 1
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, 5
br i1 %ec, label %exit, label %loop
exit:
ret void
}
attributes #0 = { vscale_range(1,16) "target-features"="+sve" }