blob: bea28d2834cfaadd459a42eabda48183f8770598 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter-out-after "middle.block:" --version 6
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-partial-aliasing-vectorization -force-target-supports-masked-memory-ops -tail-folding-policy=must-fold-tail -disable-output -vplan-print-after="attachAliasMaskToHeaderMask$" -S %s 2>&1 | FileCheck --check-prefix=INITIAL %s
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-partial-aliasing-vectorization -force-target-supports-masked-memory-ops -tail-folding-policy=must-fold-tail -disable-output -vplan-print-after="printFinalVPlan$" -S %s 2>&1 | FileCheck --check-prefix=FINAL %s
define void @alias_mask(ptr noalias %a, ptr %b, ptr %c, i64 %n) {
; INITIAL-LABEL: VPlan for loop in 'alias_mask'
; INITIAL: VPlan 'Initial VPlan for VF={4},UF>=1' {
; INITIAL-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; INITIAL-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; INITIAL-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; INITIAL-NEXT: Live-in vp<[[VP3:%[0-9]+]]> = backedge-taken count
; INITIAL-NEXT: Live-in ir<%n> = original trip-count
; INITIAL-EMPTY:
; INITIAL-NEXT: ir-bb<entry>:
; INITIAL-NEXT: Successor(s): scalar.ph, vector.ph
; INITIAL-EMPTY:
; INITIAL-NEXT: vector.ph:
; INITIAL-NEXT: EMIT vp<%incoming.alias.mask> = incoming-alias-mask
; INITIAL-NEXT: Successor(s): vector loop
; INITIAL-EMPTY:
; INITIAL-NEXT: <x1> vector loop: {
; INITIAL-NEXT: vp<[[VP4:%[0-9]+]]> = CANONICAL-IV
; INITIAL-EMPTY:
; INITIAL-NEXT: vector.body:
; INITIAL-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0]]>
; INITIAL-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = WIDEN-CANONICAL-INDUCTION nuw vp<[[VP4]]>
; INITIAL-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = icmp ule vp<[[VP5]]>, vp<[[VP3]]>
; INITIAL-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = and vp<[[VP6]]>, vp<%incoming.alias.mask>
; INITIAL-NEXT: Successor(s): vector.body.split
; INITIAL-EMPTY:
; INITIAL-NEXT: vector.body.split:
; INITIAL-NEXT: CLONE ir<%ptr.a> = getelementptr inbounds ir<%a>, ir<%iv>
; INITIAL-NEXT: vp<[[VP8:%[0-9]+]]> = vector-pointer inbounds ir<%ptr.a>, ir<1>
; INITIAL-NEXT: WIDEN ir<%ld.a> = load vp<[[VP8]]>, vp<[[VP7]]>
; INITIAL-NEXT: CLONE ir<%ptr.b> = getelementptr inbounds ir<%b>, ir<%iv>
; INITIAL-NEXT: vp<[[VP9:%[0-9]+]]> = vector-pointer inbounds ir<%ptr.b>, ir<1>
; INITIAL-NEXT: WIDEN ir<%ld.b> = load vp<[[VP9]]>, vp<[[VP7]]>
; INITIAL-NEXT: WIDEN ir<%add> = add ir<%ld.b>, ir<%ld.a>
; INITIAL-NEXT: CLONE ir<%ptr.c> = getelementptr inbounds ir<%c>, ir<%iv>
; INITIAL-NEXT: vp<[[VP10:%[0-9]+]]> = vector-pointer inbounds ir<%ptr.c>, ir<1>
; INITIAL-NEXT: WIDEN store vp<[[VP10]]>, ir<%add>, vp<[[VP7]]>
; INITIAL-NEXT: CLONE ir<%iv.next> = add nuw nsw ir<%iv>, ir<1>
; INITIAL-NEXT: CLONE ir<%exitcond.not> = icmp eq ir<%iv.next>, ir<%n>
; INITIAL-NEXT: Successor(s): vector.latch
; INITIAL-EMPTY:
; INITIAL-NEXT: vector.latch:
; INITIAL-NEXT: EMIT vp<%index.next> = add vp<[[VP4]]>, vp<[[VP1]]>
; INITIAL-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; INITIAL-NEXT: No successors
; INITIAL-NEXT: }
; INITIAL-NEXT: Successor(s): middle.block
; INITIAL-EMPTY:
; INITIAL-NEXT: middle.block:
;
; FINAL-LABEL: VPlan for loop in 'alias_mask'
; FINAL: VPlan 'Final VPlan for VF={4},UF={1}' {
; FINAL-NEXT: Live-in ir<%n> = original trip-count
; FINAL-EMPTY:
; FINAL-NEXT: ir-bb<entry>:
; FINAL-NEXT: IR %b2 = ptrtoaddr ptr %b to i64
; FINAL-NEXT: IR %c1 = ptrtoaddr ptr %c to i64
; FINAL-NEXT: Successor(s): vector.clamped.vf.check
; FINAL-EMPTY:
; FINAL-NEXT: vector.clamped.vf.check:
; FINAL-NEXT: WIDEN-INTRINSIC vp<[[VP2:%[0-9]+]]> = call llvm.loop.dependence.war.mask(ir<%b2>, ir<%c1>, ir<1>)
; FINAL-NEXT: EMIT vp<%num.active.lanes> = num-active-lanes vp<[[VP2]]>
; FINAL-NEXT: EMIT vp<%vf.is.scalar> = icmp ule vp<%num.active.lanes>, ir<1>
; FINAL-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = sub ir<-1>, ir<%n>
; FINAL-NEXT: EMIT vp<%vf.step.overflow> = icmp ult vp<[[VP3]]>, vp<%num.active.lanes>
; FINAL-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = or vp<%vf.is.scalar>, vp<%vf.step.overflow>
; FINAL-NEXT: EMIT branch-on-cond vp<[[VP4]]>
; FINAL-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
; FINAL-EMPTY:
; FINAL-NEXT: vector.ph:
; FINAL-NEXT: EMIT vp<%trip.count.minus.1> = sub ir<%n>, ir<1>
; FINAL-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = sub vp<%num.active.lanes>, ir<1>
; FINAL-NEXT: EMIT vp<%n.rnd.up> = add ir<%n>, vp<[[VP6]]>
; FINAL-NEXT: EMIT vp<%n.mod.vf> = urem vp<%n.rnd.up>, vp<%num.active.lanes>
; FINAL-NEXT: EMIT vp<%n.vec> = sub vp<%n.rnd.up>, vp<%n.mod.vf>
; FINAL-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = broadcast vp<%trip.count.minus.1>
; FINAL-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = step-vector i64
; FINAL-NEXT: EMIT vp<[[VP9:%[0-9]+]]> = broadcast vp<%num.active.lanes>
; FINAL-NEXT: Successor(s): vector.body
; FINAL-EMPTY:
; FINAL-NEXT: vector.body:
; FINAL-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
; FINAL-NEXT: WIDEN-PHI vp<[[VP10:%[0-9]+]]> = phi [ vp<[[VP8]]>, vector.ph ], [ vp<%vec.ind.next>, vector.body ]
; FINAL-NEXT: EMIT vp<[[VP11:%[0-9]+]]> = icmp ule vp<[[VP10]]>, vp<[[VP7]]>
; FINAL-NEXT: EMIT vp<[[VP12:%[0-9]+]]> = and vp<[[VP11]]>, vp<[[VP2]]>
; FINAL-NEXT: CLONE ir<%ptr.a> = getelementptr inbounds ir<%a>, vp<%index>
; FINAL-NEXT: WIDEN ir<%ld.a> = load ir<%ptr.a>, vp<[[VP12]]>
; FINAL-NEXT: CLONE ir<%ptr.b> = getelementptr inbounds ir<%b>, vp<%index>
; FINAL-NEXT: WIDEN ir<%ld.b> = load ir<%ptr.b>, vp<[[VP12]]>
; FINAL-NEXT: WIDEN ir<%add> = add ir<%ld.b>, ir<%ld.a>
; FINAL-NEXT: CLONE ir<%ptr.c> = getelementptr inbounds ir<%c>, vp<%index>
; FINAL-NEXT: WIDEN store ir<%ptr.c>, ir<%add>, vp<[[VP12]]>
; FINAL-NEXT: EMIT vp<%index.next> = add vp<%index>, vp<%num.active.lanes>
; FINAL-NEXT: EMIT vp<%vec.ind.next> = add nuw vp<[[VP10]]>, vp<[[VP9]]>
; FINAL-NEXT: EMIT vp<[[VP13:%[0-9]+]]> = icmp eq vp<%index.next>, vp<%n.vec>
; FINAL-NEXT: EMIT branch-on-cond vp<[[VP13]]>
; FINAL-NEXT: Successor(s): middle.block, vector.body
; FINAL-EMPTY:
; FINAL-NEXT: middle.block:
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%ptr.a = getelementptr inbounds i8, ptr %a, i64 %iv
%ld.a = load i8, ptr %ptr.a, align 1
%ptr.b = getelementptr inbounds i8, ptr %b, i64 %iv
%ld.b = load i8, ptr %ptr.b, align 1
%add = add i8 %ld.b, %ld.a
%ptr.c = getelementptr inbounds i8, ptr %c, i64 %iv
store i8 %add, ptr %ptr.c, align 1
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}