| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^vector.ph:" --version 5 |
| ; RUN: opt -S -force-partial-aliasing-vectorization -force-target-supports-masked-memory-ops -tail-folding-policy=must-fold-tail -force-vector-width=4 -passes=loop-vectorize %s | FileCheck %s |
| |
| target datalayout = "p0:64:64:64:16" |
| |
| ; Tests we correctly handle the extend for "NUM_ACTIVE_LANES" when the GEP index |
| ; is smaller than the IV type. |
| define void @alias_mask(ptr noalias %a, ptr %b, ptr %c, i64 %n) { |
| ; CHECK-LABEL: define void @alias_mask( |
| ; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[B2:%.*]] = ptrtoaddr ptr [[B]] to i16 |
| ; CHECK-NEXT: [[C1:%.*]] = ptrtoaddr ptr [[C]] to i16 |
| ; CHECK-NEXT: br label %[[VECTOR_CLAMPED_VF_CHECK:.*]] |
| ; CHECK: [[VECTOR_CLAMPED_VF_CHECK]]: |
| ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1.i16(i16 [[B2]], i16 [[C1]], i16 1) |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i1> [[TMP0]] to <4 x i16> |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]]) |
| ; CHECK-NEXT: [[NUM_ACTIVE_LANES:%.*]] = zext i16 [[TMP2]] to i64 |
| ; CHECK-NEXT: [[VF_IS_SCALAR:%.*]] = icmp ule i64 [[NUM_ACTIVE_LANES]], 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 -1, [[N]] |
| ; CHECK-NEXT: [[VF_STEP_OVERFLOW:%.*]] = icmp ult i64 [[TMP5]], [[NUM_ACTIVE_LANES]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[VF_IS_SCALAR]], [[VF_STEP_OVERFLOW]] |
| ; CHECK-NEXT: br i1 [[TMP6]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] |
| %gep.a = getelementptr inbounds i8, ptr %a, i64 %iv |
| %load.a = load i8, ptr %gep.a, align 1 |
| %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv |
| %load.b = load i8, ptr %gep.b, align 1 |
| %add = add i8 %load.b, %load.a |
| %gep.c = getelementptr inbounds i8, ptr %c, i64 %iv |
| store i8 %add, ptr %gep.c, align 1 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %exitcond.not = icmp eq i64 %iv.next, %n |
| br i1 %exitcond.not, label %exit, label %for.body |
| |
| exit: |
| ret void |
| } |