| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^vector.ph:" --version 5 |
| ; RUN: opt -S -force-partial-aliasing-vectorization -force-target-supports-masked-memory-ops -tail-folding-policy=must-fold-tail -force-vector-width=4 -passes=loop-vectorize %s | FileCheck %s |
| |
| ; Tests the inputs to the loop dependence masks have freezes for forked pointers. |
| define void @alias_mask_forked_pointer_needs_freeze(ptr %base1, ptr %base2, |
| ; CHECK-LABEL: define void @alias_mask_forked_pointer_needs_freeze( |
| ; CHECK-SAME: ptr [[BASE1:%.*]], ptr [[BASE2:%.*]], ptr [[DEST:%.*]], ptr [[PREDICATES:%.*]], i64 [[N:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[PREDICATES4:%.*]] = ptrtoaddr ptr [[PREDICATES]] to i64 |
| ; CHECK-NEXT: [[BASE23:%.*]] = ptrtoaddr ptr [[BASE2]] to i64 |
| ; CHECK-NEXT: [[BASE12:%.*]] = ptrtoaddr ptr [[BASE1]] to i64 |
| ; CHECK-NEXT: [[DEST1:%.*]] = ptrtoaddr ptr [[DEST]] to i64 |
| ; CHECK-NEXT: br label %[[VECTOR_CLAMPED_VF_CHECK:.*]] |
| ; CHECK: [[VECTOR_CLAMPED_VF_CHECK]]: |
| ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1.i64(i64 [[PREDICATES4]], i64 [[DEST1]], i64 4) |
| ; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[BASE23]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = freeze i64 [[DEST1]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1.i64(i64 [[TMP1]], i64 [[TMP2]], i64 4) |
| ; CHECK-NEXT: [[TMP4:%.*]] = and <4 x i1> [[TMP0]], [[TMP3]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = freeze i64 [[BASE12]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = freeze i64 [[DEST1]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1.i64(i64 [[TMP5]], i64 [[TMP6]], i64 4) |
| ; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i1> [[TMP4]], [[TMP7]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i1> [[TMP8]] to <4 x i64> |
| ; CHECK-NEXT: [[NUM_ACTIVE_LANES:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP9]]) |
| ; CHECK-NEXT: [[VF_IS_SCALAR:%.*]] = icmp ule i64 [[NUM_ACTIVE_LANES]], 1 |
| ; CHECK-NEXT: [[TMP11:%.*]] = sub i64 -1, [[N]] |
| ; CHECK-NEXT: [[VF_STEP_OVERFLOW:%.*]] = icmp ult i64 [[TMP11]], [[NUM_ACTIVE_LANES]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[VF_IS_SCALAR]], [[VF_STEP_OVERFLOW]] |
| ; CHECK-NEXT: br i1 [[TMP12]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; |
| ptr %dest, ptr %predicates, |
| i64 %n) { |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %p.gep = getelementptr inbounds i32, ptr %predicates, i64 %iv |
| %p = load i32, ptr %p.gep, align 4 |
| %cmp = icmp eq i32 %p, 0 |
| %src = select i1 %cmp, ptr %base2, ptr %base1 |
| %src.gep = getelementptr inbounds float, ptr %src, i64 %iv |
| %src.val = load float, ptr %src.gep, align 4 |
| %dst.gep = getelementptr inbounds float, ptr %dest, i64 %iv |
| store float %src.val, ptr %dst.gep, align 4 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, %n |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |