blob: 44940aff4869970746cd6ea19047797fe505bd35 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^vector.ph:" --version 5
; RUN: opt -S -force-partial-aliasing-vectorization -force-target-supports-masked-memory-ops -tail-folding-policy=must-fold-tail -force-vector-width=4 -passes=loop-vectorize %s | FileCheck %s
; Tests the inputs to the loop dependence masks have freezes for forked pointers.
define void @alias_mask_forked_pointer_needs_freeze(ptr %base1, ptr %base2,
; CHECK-LABEL: define void @alias_mask_forked_pointer_needs_freeze(
; CHECK-SAME: ptr [[BASE1:%.*]], ptr [[BASE2:%.*]], ptr [[DEST:%.*]], ptr [[PREDICATES:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[PREDICATES4:%.*]] = ptrtoaddr ptr [[PREDICATES]] to i64
; CHECK-NEXT: [[BASE23:%.*]] = ptrtoaddr ptr [[BASE2]] to i64
; CHECK-NEXT: [[BASE12:%.*]] = ptrtoaddr ptr [[BASE1]] to i64
; CHECK-NEXT: [[DEST1:%.*]] = ptrtoaddr ptr [[DEST]] to i64
; CHECK-NEXT: br label %[[VECTOR_CLAMPED_VF_CHECK:.*]]
; CHECK: [[VECTOR_CLAMPED_VF_CHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1.i64(i64 [[PREDICATES4]], i64 [[DEST1]], i64 4)
; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[BASE23]]
; CHECK-NEXT: [[TMP2:%.*]] = freeze i64 [[DEST1]]
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1.i64(i64 [[TMP1]], i64 [[TMP2]], i64 4)
; CHECK-NEXT: [[TMP4:%.*]] = and <4 x i1> [[TMP0]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = freeze i64 [[BASE12]]
; CHECK-NEXT: [[TMP6:%.*]] = freeze i64 [[DEST1]]
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1.i64(i64 [[TMP5]], i64 [[TMP6]], i64 4)
; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i1> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i1> [[TMP8]] to <4 x i64>
; CHECK-NEXT: [[NUM_ACTIVE_LANES:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP9]])
; CHECK-NEXT: [[VF_IS_SCALAR:%.*]] = icmp ule i64 [[NUM_ACTIVE_LANES]], 1
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 -1, [[N]]
; CHECK-NEXT: [[VF_STEP_OVERFLOW:%.*]] = icmp ult i64 [[TMP11]], [[NUM_ACTIVE_LANES]]
; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[VF_IS_SCALAR]], [[VF_STEP_OVERFLOW]]
; CHECK-NEXT: br i1 [[TMP12]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
;
ptr %dest, ptr %predicates,
i64 %n) {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%p.gep = getelementptr inbounds i32, ptr %predicates, i64 %iv
%p = load i32, ptr %p.gep, align 4
%cmp = icmp eq i32 %p, 0
%src = select i1 %cmp, ptr %base2, ptr %base1
%src.gep = getelementptr inbounds float, ptr %src, i64 %iv
%src.val = load float, ptr %src.gep, align 4
%dst.gep = getelementptr inbounds float, ptr %dest, i64 %iv
store float %src.val, ptr %dst.gep, align 4
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, %n
br i1 %ec, label %exit, label %loop
exit:
ret void
}