blob: 62fe6629916fd013ffed4656bbf2b2a3250ff15c [file]
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -p loop-vectorize -force-vector-width=4 -disable-output \
; RUN: -vplan-print-after=scalarizeMemOpsWithIrregularTypes \
; RUN: -enable-mem-access-versioning=false 2>&1 | FileCheck %s
define void @basic(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'basic'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
define void @basic_optsize(ptr noalias %p.out, ptr %p, i64 %stride) #0 {
; CHECK-LABEL: VPlan for loop in 'basic_optsize'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
attributes #0 = { optsize }
define void @basic_minsize(ptr noalias %p.out, ptr %p, i64 %stride) #1 {
; CHECK-LABEL: VPlan for loop in 'basic_minsize'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
attributes #1 = { minsize }
; When using byte-gep with wide memop unit-stride wouldn't be one at the geps
; index but rather mem-access-type-size. This test has constant multiplier equal
; to that exact value `8 * %stride` so that `%stride == 1` would result in
; unit-strided load.
define void @byte_gep_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'byte_gep_scaled_stride'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%stride.x8> = mul ir<%stride>, ir<8>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride.x8>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %stride.x8 = mul i64 %stride, 8
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride.x8
; CHECK-NEXT: IR %gep.ld = getelementptr i8, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%stride.x8 = mul i64 %stride, 8
%idx = mul i64 %iv, %stride.x8
%gep.ld = getelementptr i8, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Similar to above but constant multiplier is smaller than load type's width, so
; unit-strideness would require `%stride == 2`.
define void @byte_gep_under_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'byte_gep_under_scaled_stride'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%stride.x8> = mul ir<%stride>, ir<4>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride.x8>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %stride.x8 = mul i64 %stride, 4
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride.x8
; CHECK-NEXT: IR %gep.ld = getelementptr i8, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%stride.x8 = mul i64 %stride, 4
%idx = mul i64 %iv, %stride.x8
%gep.ld = getelementptr i8, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Another variation for constant multiplier with byte gep. This time the
; multiplier is bigger than load access type so this cannot be speculated for
; unit-strideness.
define void @byte_gep_over_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'byte_gep_over_scaled_stride'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%stride.x8> = mul ir<%stride>, ir<16>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride.x8>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %stride.x8 = mul i64 %stride, 16
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride.x8
; CHECK-NEXT: IR %gep.ld = getelementptr i8, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%stride.x8 = mul i64 %stride, 16
%idx = mul i64 %iv, %stride.x8
%gep.ld = getelementptr i8, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; And another one, with multiplier non-power-of-two. Cannot be speculated for
; unit-strideness.
define void @byte_gep_non_power_of_two_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'byte_gep_non_power_of_two_scaled_stride'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%stride.x8> = mul ir<%stride>, ir<11>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride.x8>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %stride.x8 = mul i64 %stride, 11
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride.x8
; CHECK-NEXT: IR %gep.ld = getelementptr i8, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%stride.x8 = mul i64 %stride, 11
%idx = mul i64 %iv, %stride.x8
%gep.ld = getelementptr i8, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; No constant multiplier, need to speculate `%stride == sizeof(load-access-type)`.
define void @byte_gep_nonscaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'byte_gep_nonscaled_stride'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i8, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i8, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; If we negate %stride before indexing, this might be a good heuristic to avoid
; stride speculation... At the very least, keep this test for the documentation
; purposes.
define void @byte_gep_negated_stride(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'byte_gep_negated_stride'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%stride.neg> = sub ir<0>, ir<%stride>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride.neg>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %stride.neg = sub i64 0, %stride
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride.neg
; CHECK-NEXT: IR %gep.ld = getelementptr i8, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%stride.neg = sub i64 0, %stride
%idx = mul i64 %iv, %stride.neg
%gep.ld = getelementptr i8, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Two memory accesses can be speculated for unit-strideness by single predicate.
define void @shared_stride(ptr noalias %p.out, ptr %p0, ptr %p1, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'shared_stride'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld0> = getelementptr ir<%p0>, ir<%idx>
; CHECK-NEXT: EMIT ir<%gep.ld1> = getelementptr ir<%p1>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld0> = load ir<%gep.ld0>
; CHECK-NEXT: EMIT-SCALAR ir<%ld1> = load ir<%gep.ld1>
; CHECK-NEXT: EMIT ir<%val> = add ir<%ld0>, ir<%ld1>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld0 = getelementptr i64, ptr %p0, i64 %idx
; CHECK-NEXT: IR %gep.ld1 = getelementptr i64, ptr %p1, i64 %idx
; CHECK-NEXT: IR %ld0 = load i64, ptr %gep.ld0, align 8
; CHECK-NEXT: IR %ld1 = load i64, ptr %gep.ld1, align 8
; CHECK-NEXT: IR %val = add i64 %ld0, %ld1
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld0 = getelementptr i64, ptr %p0, i64 %idx
%gep.ld1 = getelementptr i64, ptr %p1, i64 %idx
%ld0 = load i64, ptr %gep.ld0, align 8
%ld1 = load i64, ptr %gep.ld1, align 8
%val = add i64 %ld0, %ld1
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %val, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Speculating one access for unit-strideness guarantees that the other one isn't.
define void @dependent_strides(ptr noalias %p.out, ptr %p0, ptr %p1, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'dependent_strides'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%stride1> = add ir<%stride>, ir<1>
; CHECK-NEXT: EMIT ir<%idx0> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%idx1> = mul ir<%iv>, ir<%stride1>
; CHECK-NEXT: EMIT ir<%gep.ld0> = getelementptr ir<%p0>, ir<%idx0>
; CHECK-NEXT: EMIT ir<%gep.ld1> = getelementptr ir<%p1>, ir<%idx1>
; CHECK-NEXT: EMIT-SCALAR ir<%ld0> = load ir<%gep.ld0>
; CHECK-NEXT: EMIT-SCALAR ir<%ld1> = load ir<%gep.ld1>
; CHECK-NEXT: EMIT ir<%val> = add ir<%ld0>, ir<%ld1>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %stride1 = add i64 %stride, 1
; CHECK-NEXT: IR %idx0 = mul i64 %iv, %stride
; CHECK-NEXT: IR %idx1 = mul i64 %iv, %stride1
; CHECK-NEXT: IR %gep.ld0 = getelementptr i64, ptr %p0, i64 %idx0
; CHECK-NEXT: IR %gep.ld1 = getelementptr i64, ptr %p1, i64 %idx1
; CHECK-NEXT: IR %ld0 = load i64, ptr %gep.ld0, align 8
; CHECK-NEXT: IR %ld1 = load i64, ptr %gep.ld1, align 8
; CHECK-NEXT: IR %val = add i64 %ld0, %ld1
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%stride1 = add i64 %stride, 1
%idx0 = mul i64 %iv, %stride
%idx1 = mul i64 %iv, %stride1
%gep.ld0 = getelementptr i64, ptr %p0, i64 %idx0
%gep.ld1 = getelementptr i64, ptr %p1, i64 %idx1
%ld0 = load i64, ptr %gep.ld0, align 8
%ld1 = load i64, ptr %gep.ld1, align 8
%val = add i64 %ld0, %ld1
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %val, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Same as above but in different order. Might be reasonable to have some
; heuristic to choose one over another instead of just speculating the first
; access.
define void @dependent_strides_reverse_order(ptr noalias %p.out, ptr %p0, ptr %p1, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'dependent_strides_reverse_order'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%stride0> = add ir<%stride>, ir<1>
; CHECK-NEXT: EMIT ir<%idx0> = mul ir<%iv>, ir<%stride0>
; CHECK-NEXT: EMIT ir<%idx1> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld0> = getelementptr ir<%p0>, ir<%idx0>
; CHECK-NEXT: EMIT ir<%gep.ld1> = getelementptr ir<%p1>, ir<%idx1>
; CHECK-NEXT: EMIT-SCALAR ir<%ld0> = load ir<%gep.ld0>
; CHECK-NEXT: EMIT-SCALAR ir<%ld1> = load ir<%gep.ld1>
; CHECK-NEXT: EMIT ir<%val> = add ir<%ld0>, ir<%ld1>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %stride0 = add i64 %stride, 1
; CHECK-NEXT: IR %idx0 = mul i64 %iv, %stride0
; CHECK-NEXT: IR %idx1 = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld0 = getelementptr i64, ptr %p0, i64 %idx0
; CHECK-NEXT: IR %gep.ld1 = getelementptr i64, ptr %p1, i64 %idx1
; CHECK-NEXT: IR %ld0 = load i64, ptr %gep.ld0, align 8
; CHECK-NEXT: IR %ld1 = load i64, ptr %gep.ld1, align 8
; CHECK-NEXT: IR %val = add i64 %ld0, %ld1
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%stride0 = add i64 %stride, 1
%idx0 = mul i64 %iv, %stride0
%idx1 = mul i64 %iv, %stride
%gep.ld0 = getelementptr i64, ptr %p0, i64 %idx0
%gep.ld1 = getelementptr i64, ptr %p1, i64 %idx1
%ld0 = load i64, ptr %gep.ld0, align 8
%ld1 = load i64, ptr %gep.ld1, align 8
%val = add i64 %ld0, %ld1
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %val, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Two dependent accesses again, but instead of strides being dependent on each
; other, we have the same stride but different access sizes.
define void @byte_dependent_byte_geps(ptr noalias %p.out, ptr %p0, ptr %p1, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'byte_dependent_byte_geps'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld0> = getelementptr ir<%p0>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld0> = load ir<%gep.ld0>
; CHECK-NEXT: EMIT ir<%gep.ld1> = getelementptr ir<%p1>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld1> = load ir<%gep.ld1>
; CHECK-NEXT: EMIT-SCALAR ir<%ld1.ext> = sext ir<%ld1> to i64
; CHECK-NEXT: EMIT ir<%val> = add ir<%ld0>, ir<%ld1.ext>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld0 = getelementptr i8, ptr %p0, i64 %idx
; CHECK-NEXT: IR %ld0 = load i64, ptr %gep.ld0, align 8
; CHECK-NEXT: IR %gep.ld1 = getelementptr i8, ptr %p1, i64 %idx
; CHECK-NEXT: IR %ld1 = load i32, ptr %gep.ld1, align 8
; CHECK-NEXT: IR %ld1.ext = sext i32 %ld1 to i64
; CHECK-NEXT: IR %val = add i64 %ld0, %ld1.ext
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld0 = getelementptr i8, ptr %p0, i64 %idx
%ld0 = load i64, ptr %gep.ld0, align 8
%gep.ld1 = getelementptr i8, ptr %p1, i64 %idx
%ld1 = load i32, ptr %gep.ld1, align 8
%ld1.ext = sext i32 %ld1 to i64
%val = add i64 %ld0, %ld1.ext
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %val, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Likewise but reverse order of accesses.
define void @byte_dependent_byte_geps_reverse_order(ptr noalias %p.out, ptr %p0, ptr %p1, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'byte_dependent_byte_geps_reverse_order'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld1> = getelementptr ir<%p1>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld1> = load ir<%gep.ld1>
; CHECK-NEXT: EMIT-SCALAR ir<%ld1.ext> = sext ir<%ld1> to i64
; CHECK-NEXT: EMIT ir<%gep.ld0> = getelementptr ir<%p0>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld0> = load ir<%gep.ld0>
; CHECK-NEXT: EMIT ir<%val> = add ir<%ld0>, ir<%ld1.ext>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld1 = getelementptr i8, ptr %p1, i64 %idx
; CHECK-NEXT: IR %ld1 = load i32, ptr %gep.ld1, align 8
; CHECK-NEXT: IR %ld1.ext = sext i32 %ld1 to i64
; CHECK-NEXT: IR %gep.ld0 = getelementptr i8, ptr %p0, i64 %idx
; CHECK-NEXT: IR %ld0 = load i64, ptr %gep.ld0, align 8
; CHECK-NEXT: IR %val = add i64 %ld0, %ld1.ext
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld1 = getelementptr i8, ptr %p1, i64 %idx
%ld1 = load i32, ptr %gep.ld1, align 8
%ld1.ext = sext i32 %ld1 to i64
%gep.ld0 = getelementptr i8, ptr %p0, i64 %idx
%ld0 = load i64, ptr %gep.ld0, align 8
%val = add i64 %ld0, %ld1.ext
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %val, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Interleave group with non-constant stride. Probabably doesn't make sense to
; speculate the stride here (as a heuristic).
define void @strided_interleave(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'strided_interleave'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld0> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT ir<%gep.ld1> = getelementptr ir<%gep.ld0>, ir<1>
; CHECK-NEXT: EMIT-SCALAR ir<%ld0> = load ir<%gep.ld0>
; CHECK-NEXT: EMIT-SCALAR ir<%ld1> = load ir<%gep.ld1>
; CHECK-NEXT: EMIT ir<%val> = add ir<%ld0>, ir<%ld1>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld0 = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %gep.ld1 = getelementptr i64, ptr %gep.ld0, i61 1
; CHECK-NEXT: IR %ld0 = load i64, ptr %gep.ld0, align 8
; CHECK-NEXT: IR %ld1 = load i64, ptr %gep.ld1, align 8
; CHECK-NEXT: IR %val = add i64 %ld0, %ld1
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld0 = getelementptr i64, ptr %p, i64 %idx
%gep.ld1 = getelementptr i64, ptr %gep.ld0, i61 1
%ld0 = load i64, ptr %gep.ld0, align 8
%ld1 = load i64, ptr %gep.ld1, align 8
%val = add i64 %ld0, %ld1
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %val, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Mem access ptr in the form of `(%base,+,%stride)<%header>` where %base is
; something present as in instruction in the loop body (although that would be a
; SCEV expression, not SCEVUnknown).
define void @in_loop_base(ptr noalias %p.out, ptr %p, i64 %stride, i64 %offset) {
; CHECK-LABEL: VPlan for loop in 'in_loop_base'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%mul> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%idx> = add ir<%mul>, ir<%offset>
; CHECK-NEXT: EMIT ir<%gep.ld.base> = getelementptr ir<%p>, ir<%offset>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%gep.ld.base>, ir<%mul>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %mul = mul i64 %iv, %stride
; CHECK-NEXT: IR %idx = add i64 %mul, %offset
; CHECK-NEXT: IR %gep.ld.base = getelementptr i64, ptr %p, i64 %offset
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %gep.ld.base, i64 %mul
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%mul = mul i64 %iv, %stride
%idx = add i64 %mul, %offset
%gep.ld.base = getelementptr i64,ptr %p, i64 %offset
%gep.ld = getelementptr i64, ptr %gep.ld.base, i64 %mul
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Base (non-recurrent) part of the memory access pointer doesn't have
; corresponding IR value, e.g., `(base + %iv*%stirde) + %offset`.
define void @base_not_in_ir(ptr noalias %p.out, ptr %p, i64 %stride, i64 %offset) {
; CHECK-LABEL: VPlan for loop in 'base_not_in_ir'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%mul> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%idx> = add ir<%mul>, ir<%offset>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %mul = mul i64 %iv, %stride
; CHECK-NEXT: IR %idx = add i64 %mul, %offset
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%mul = mul i64 %iv, %stride
%idx = add i64 %mul, %offset
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; If the base pointer is uniform but not loop-invariant we could still speculate
; access to be unit-strided, although not implemented yet.
define void @non_invariant_uniform_base(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'non_invariant_uniform_base'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%iv.sdiv32> = sdiv ir<%iv>, ir<32>
; CHECK-NEXT: EMIT ir<%mul> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%idx> = add ir<%mul>, ir<%iv.sdiv32>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %iv.sdiv32 = sdiv i64 %iv, 32
; CHECK-NEXT: IR %mul = mul i64 %iv, %stride
; CHECK-NEXT: IR %idx = add i64 %mul, %iv.sdiv32
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%iv.sdiv32 = sdiv i64 %iv, 32
%mul = mul i64 %iv, %stride
%idx = add i64 %mul, %iv.sdiv32
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
define void @non_invariant_uniform_stride(ptr noalias %p.out, ptr %p, ptr %p.uni) {
; CHECK-LABEL: VPlan for loop in 'non_invariant_uniform_stride'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%iv.sdiv32> = sdiv ir<%iv>, ir<32>
; CHECK-NEXT: EMIT ir<%gep.uni> = getelementptr ir<%p.uni>, ir<%iv.sdiv32>
; CHECK-NEXT: EMIT-SCALAR ir<%stride> = load ir<%gep.uni>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %iv.sdiv32 = sdiv i64 %iv, 32
; CHECK-NEXT: IR %gep.uni = getelementptr i64, ptr %p.uni, i64 %iv.sdiv32
; CHECK-NEXT: IR %stride = load i64, ptr %gep.uni, align 4
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%iv.sdiv32 = sdiv i64 %iv, 32
%gep.uni = getelementptr i64, ptr %p.uni, i64 %iv.sdiv32
%stride = load i64, ptr %gep.uni
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Not valuable by itself, but as a basis for the subsequent test to ensure that
; non-constant trip count doesn't change anything by itself. Also show the order
; of checks between stride speculation and trip-count check when not
; tail-folding.
define void @non_constant_btc(ptr noalias %p.out, ptr %p, i64 %stride, i64 %n) {
; CHECK-LABEL: VPlan for loop in 'non_constant_btc'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 smax %n)
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<%n>
; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, %n
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; BTC == stride, so stride speculation would result in zero vector loop
; iterations.
define void @stride_as_btc(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'stride_as_btc'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 smax %stride)
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<%stride>
; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, %stride
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, %stride
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Similar to above but a slightly more complex dependency between stride and
; BTC.
define void @stride_dependent_btc(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'stride_dependent_btc'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: IR %n = add i64 %stride, 1
; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 smax (1 + %stride))
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<%n>
; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
%n = add i64 %stride, 1
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, %n
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; BTC dependent on %stride, but stride speculation doesn't necessarily mean no
; vector loop iterations. The test shows in which order we emit the run time
; checks for both.
define void @stride_btc_checks_order(ptr noalias %p.out, ptr %p, i64 %stride, i64 %m) {
; CHECK-LABEL: VPlan for loop in 'stride_btc_checks_order'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: IR %n = mul i64 %m, %stride
; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 smax (%stride * %m))
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<%n>
; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
%n = mul i64 %m, %stride
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, %n
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; BTC fully defined by stride speculation but still allows vector loop execution.
define void @stride_dependent_btc_non_preventive(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'stride_dependent_btc_non_preventive'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: IR %n = add i64 %stride, 3
; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 smax (3 + %stride))
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<%n>
; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
%n = add i64 %stride, 3
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, %n
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Doesn't pass legality as run-time memory dependencies check doesn't support
; strided accesses. If it did, the purpose of this test would be to show how all
; three run-time checks (mem deps/stride speculation/trip-count) would be
; ordered in respect to each other. Note that pointer aliasing check could
; potentially be simplified if done after stride speculation. However, that
; isn't necessarily the best idea because we could also multi-version for stride
; and keep aliasing part generic and shared by both vector loops..
define void @stride_btc_memdep_triple_check(ptr %p, i64 %stride, i64 %out.offset) {
entry:
%p.out = getelementptr i8, ptr %p, i64 %out.offset
%n = add i64 %stride, 3
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, %n
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Same as above but memdep check doesn't depend on stride
define void @stride_btc_independent_memdep_triple_check(ptr %p, ptr noalias %p2, i64 %stride, i64 %out.offset) {
; CHECK-LABEL: VPlan for loop in 'stride_btc_independent_memdep_triple_check'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: IR %p.out = getelementptr i8, ptr %p2, i64 %out.offset
; CHECK-NEXT: IR %n = add i64 %stride, 3
; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 smax (3 + %stride))
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.ld2> = getelementptr ir<%p2>, ir<%iv>
; CHECK-NEXT: EMIT-SCALAR ir<%ld2> = load ir<%gep.ld2>
; CHECK-NEXT: EMIT ir<%val> = add ir<%ld>, ir<%ld2>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<%n>
; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.ld2 = getelementptr i64, ptr %p2, i64 %iv
; CHECK-NEXT: IR %ld2 = load i64, ptr %gep.ld2, align 8
; CHECK-NEXT: IR %val = add i64 %ld, %ld2
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, %n
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
%p.out = getelementptr i8, ptr %p2, i64 %out.offset
%n = add i64 %stride, 3
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.ld2 = getelementptr i64, ptr %p2, i64 %iv
%ld2 = load i64, ptr %gep.ld2, align 8
%val = add i64 %ld, %ld2
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %val, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, %n
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
define void @actual_stride_not_in_ir(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'actual_stride_not_in_ir'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%base> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%base>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %base = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %base, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%base = getelementptr i64, ptr %p, i64 %idx
%gep.ld = getelementptr i64, ptr %base, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Gep into multi-dimensional array. Strided last index can be speculated to
; result in a unit-strided memory access.
define void @nd_array_last_idx(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'nd_array_last_idx'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<1>, ir<42>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr [256 x [256 x i64]], ptr %p, i64 1, i64 42, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr [256 x [256 x i64]], ptr %p, i64 1, i64 42, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Strided inner index will never result in unit-strided memory-access, even if
; its stride is one.
define void @nd_array_non_last_idx(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'nd_array_non_last_idx'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<1>, ir<%idx>, ir<42>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr [256 x [256 x i64]], ptr %p, i64 1, i64 %idx, i64 42
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr [256 x [256 x i64]], ptr %p, i64 1, i64 %idx, i64 42
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Isn't unit-strided either.
define void @nd_array_multiple_idxs(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'nd_array_multiple_idxs'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<1>, ir<%idx>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr [256 x [256 x i64]], ptr %p, i64 1, i64 %idx, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr [256 x [256 x i64]], ptr %p, i64 1, i64 %idx, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Stride is used through `sext` in the loop.
define void @sext_stride(ptr noalias %p.out, ptr %p, i32 %stride.i32) {
; CHECK-LABEL: VPlan for loop in 'sext_stride'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT-SCALAR ir<%stride> = sext ir<%stride.i32> to i64
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %stride = sext i32 %stride.i32 to i64
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%stride = sext i32 %stride.i32 to i64
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Stride is used through `trunc` in the loop.
define void @trunc_stride(ptr noalias %p.out, ptr %p, i64 %stride.i64) {
; CHECK-LABEL: VPlan for loop in 'trunc_stride'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT-SCALAR ir<%stride> = trunc ir<%stride.i64> to i32
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %stride = trunc i64 %stride.i64 to i32
; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, 1
; CHECK-NEXT: IR %idx = mul i32 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i32, ptr %p, i32 %idx
; CHECK-NEXT: IR %ld = load i32, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i32, ptr %p.out, i32 %iv
; CHECK-NEXT: IR store i32 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i32 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %header ]
%stride = trunc i64 %stride.i64 to i32
%iv.next = add nsw i32 %iv, 1
%idx = mul i32 %iv, %stride
%gep.ld = getelementptr i32, ptr %p, i32 %idx
%ld = load i32, ptr %gep.ld, align 8
%gep.st = getelementptr i32, ptr %p.out, i32 %iv
store i32 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i32 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; %stride is usedc through both `trunc`/`sext` for different accesses.
define void @trunc_ext_stride(ptr noalias %p.out, ptr %p0, ptr %p1, i32 %stride) {
; CHECK-LABEL: VPlan for loop in 'trunc_ext_stride'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: IR %stride.trunc = trunc i32 %stride to i16
; CHECK-NEXT: IR %stride.ext = sext i32 %stride to i64
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT-SCALAR ir<%iv.trunc> = trunc ir<%iv> to i16
; CHECK-NEXT: EMIT-SCALAR ir<%iv.ext> = sext ir<%iv> to i64
; CHECK-NEXT: EMIT ir<%idx.trunc> = mul ir<%iv.trunc>, ir<%stride.trunc>
; CHECK-NEXT: EMIT ir<%idx.ext> = mul ir<%iv.ext>, ir<%stride.ext>
; CHECK-NEXT: EMIT ir<%gep.trunc> = getelementptr ir<%p0>, ir<%idx.trunc>
; CHECK-NEXT: EMIT ir<%gep.ext> = getelementptr ir<%p0>, ir<%idx.ext>
; CHECK-NEXT: EMIT-SCALAR ir<%ld.trunc> = load ir<%gep.trunc>
; CHECK-NEXT: EMIT-SCALAR ir<%ld.ext> = load ir<%gep.ext>
; CHECK-NEXT: EMIT ir<%val> = add ir<%ld.trunc>, ir<%ld.ext>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, 1
; CHECK-NEXT: IR %iv.trunc = trunc i32 %iv to i16
; CHECK-NEXT: IR %iv.ext = sext i32 %iv to i64
; CHECK-NEXT: IR %idx.trunc = mul i16 %iv.trunc, %stride.trunc
; CHECK-NEXT: IR %idx.ext = mul i64 %iv.ext, %stride.ext
; CHECK-NEXT: IR %gep.trunc = getelementptr i32, ptr %p0, i16 %idx.trunc
; CHECK-NEXT: IR %gep.ext = getelementptr i32, ptr %p0, i64 %idx.ext
; CHECK-NEXT: IR %ld.trunc = load i32, ptr %gep.trunc, align 4
; CHECK-NEXT: IR %ld.ext = load i32, ptr %gep.ext, align 4
; CHECK-NEXT: IR %val = add i32 %ld.trunc, %ld.ext
; CHECK-NEXT: IR %gep.st = getelementptr i32, ptr %p.out, i32 %iv
; CHECK-NEXT: IR store i32 %val, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i32 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
%stride.trunc = trunc i32 %stride to i16
%stride.ext = sext i32 %stride to i64
br label %header
header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i32 %iv, 1
%iv.trunc = trunc i32 %iv to i16
%iv.ext = sext i32 %iv to i64
%idx.trunc = mul i16 %iv.trunc, %stride.trunc
%idx.ext = mul i64 %iv.ext, %stride.ext
%gep.trunc = getelementptr i32, ptr %p0, i16 %idx.trunc
%gep.ext = getelementptr i32, ptr %p0, i64 %idx.ext
%ld.trunc = load i32, ptr %gep.trunc, align 4
%ld.ext = load i32, ptr %gep.ext, align 4
%val = add i32 %ld.trunc, %ld.ext
%gep.st = getelementptr i32, ptr %p.out, i32 %iv
store i32 %val, ptr %gep.st, align 8
%exitcond = icmp slt i32 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Check that we don't speculate unit-strided masked memory access if masked wide
; memory operation isn't legal (or that we properly pass the mask if it is).
define void @basic_masked(ptr noalias %p.out, ptr %p, i64 %stride, i64 %x) {
; CHECK-LABEL: VPlan for loop in 'basic_masked'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%c> = icmp sge ir<%iv>, ir<%x>
; CHECK-NEXT: Successor(s): if
; CHECK-EMPTY:
; CHECK-NEXT: if:
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>, ir<%c>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>, ir<%c>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>, ir<%c>
; CHECK-NEXT: Successor(s): latch
; CHECK-EMPTY:
; CHECK-NEXT: latch:
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%c>
; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = or ir<%c>, vp<[[VP4]]>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>, vp<[[VP5]]>
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP8]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %c = icmp sge i64 %iv, %x
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
%iv.next = add nsw i64 %iv, 1
%c = icmp sge i64 %iv, %x
br i1 %c, label %if, label %latch
if:
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
br label %latch
latch:
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; See https://github.com/llvm/llvm-project/issues/162922.
define void @stride_poison(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'stride_poison'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<poison>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, poison
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, poison
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Tests above all used loads, make sure that store is supported too.
define void @basic_strided_store(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'basic_strided_store'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%iv>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%idx>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %iv
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %idx
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %iv
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %idx
store i64 %ld, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; This test shows how/if we scalarize address computation def-chain if that
; pointer has other non-scalar uses.
define void @ptr_vec_use(ptr noalias %p.out, ptr noalias %p.ptr.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'ptr_vec_use'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%gep.ptr.st> = getelementptr ir<%p.ptr.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%gep.ld>, ir<%gep.ptr.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8
; CHECK-NEXT: IR %gep.ptr.st = getelementptr ptr, ptr %p.ptr.out, i64 %iv
; CHECK-NEXT: IR store ptr %gep.ld, ptr %gep.ptr.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%gep.ptr.st = getelementptr ptr, ptr %p.ptr.out, i64 %iv
store ptr %gep.ld, ptr %gep.ptr.st
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Similar to above, but it's not the resulting pointer itself that has
; non-scalar use but something in the middle of its def-chain.
define void @stride_idx_vec_use(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'stride_idx_vec_use'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%val> = mul ir<%ld>, ir<%idx>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %idx = mul i64 %iv, %stride
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %val = mul i64 %ld, %idx
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%idx = mul i64 %iv, %stride
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%val = mul i64 %ld, %idx
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %val, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; Another variation of the above, even longer def-chain.
define void @offset_stride_idx_vec_use(ptr noalias %p.out, ptr %p, i64 %stride) {
; CHECK-LABEL: VPlan for loop in 'offset_stride_idx_vec_use'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: Live-in ir<128> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%iv.times.stride> = mul ir<%iv>, ir<%stride>
; CHECK-NEXT: EMIT ir<%idx> = add ir<%iv.times.stride>, ir<42>
; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx>
; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>
; CHECK-NEXT: EMIT ir<%val> = mul ir<%ld>, ir<%idx>
; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st>
; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<header>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<header>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1
; CHECK-NEXT: IR %iv.times.stride = mul i64 %iv, %stride
; CHECK-NEXT: IR %idx = add i64 %iv.times.stride, 42
; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx
; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8
; CHECK-NEXT: IR %val = mul i64 %ld, %idx
; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv
; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8
; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-EMPTY:
; CHECK-NEXT: remark: <unknown>:0:0: loop not vectorized: value that could not be identified as reduction is used outside the loop
;
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%iv.next = add nsw i64 %iv, 1
%iv.times.stride = mul i64 %iv, %stride
%idx = add i64 %iv.times.stride, 42
%gep.ld = getelementptr i64, ptr %p, i64 %idx
%ld = load i64, ptr %gep.ld, align 8
%val = mul i64 %ld, %idx
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %val, ptr %gep.st, align 8
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
; No VPlan dump because `%gep.ld` phi doesn't pass legality currently.
define void @base_ptr_induction_vec_use(ptr noalias %p.out, ptr noalias %p.ptr.out, ptr %p, i64 %stride) {
entry:
br label %header
header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
%gep.ld = phi ptr [ %p, %entry ], [ %gep.ld.next, %header ]
%iv.next = add nsw i64 %iv, 1
%gep.ld.next = getelementptr inbounds i64, ptr %gep.ld, i64 %stride
%ld = load i64, ptr %gep.ld, align 8
%gep.st = getelementptr i64, ptr %p.out, i64 %iv
store i64 %ld, ptr %gep.st, align 8
%gep.ptr.st = getelementptr ptr, ptr %p.ptr.out, i64 %iv
store ptr %gep.ld, ptr %gep.ptr.st
%exitcond = icmp slt i64 %iv.next, 128
br i1 %exitcond, label %header, label %exit
exit:
ret void
}
define void @test_rewrite_iv_scevs(i32 %start, ptr %dst) {
; CHECK-LABEL: VPlan for loop in 'test_rewrite_iv_scevs'
; CHECK: VPlan ' for UF>=1' {
; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: IR %start.ext = zext i32 %start to i64
; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (100 + (-1 * (zext i32 %start to i64))<nsw>)<nsw>
; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = EXPAND SCEV (zext i32 %start to i64)
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: ir<%iv.0> = WIDEN-INDUCTION ir<%start.ext>, ir<1>, vp<[[VP0]]>
; CHECK-NEXT: ir<%iv.1> = WIDEN-INDUCTION ir<0>, vp<[[VP4]]>, vp<[[VP0]]>
; CHECK-NEXT: EMIT ir<%gep.dst> = getelementptr ir<%dst>, ir<%iv.1>
; CHECK-NEXT: EMIT store ir<0.000000e+00>, ir<%gep.dst>
; CHECK-NEXT: EMIT ir<%iv.1.next> = add ir<%iv.1>, vp<[[VP4]]>
; CHECK-NEXT: EMIT ir<%iv.0.next> = add ir<%iv.0>, ir<1>
; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.0.next>, ir<100>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP5]]>, vp<[[VP1]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv.0>
; CHECK-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = exiting-iv-value ir<%iv.1>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<%start.ext>, ir-bb<entry> ]
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val>.1 = phi [ vp<[[VP8]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
; CHECK-NEXT: IR %iv.0 = phi i64 [ %start.ext, %entry ], [ %iv.0.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %iv.1 = phi i64 [ 0, %entry ], [ %iv.1.next, %loop ] (extra operand: vp<%bc.resume.val>.1 from scalar.ph)
; CHECK-NEXT: IR %gep.dst = getelementptr float, ptr %dst, i64 %iv.1
; CHECK-NEXT: IR store float 0.000000e+00, ptr %gep.dst, align 4
; CHECK-NEXT: IR %iv.1.next = add i64 %iv.1, %start.ext
; CHECK-NEXT: IR %iv.0.next = add i64 %iv.0, 1
; CHECK-NEXT: IR %ec = icmp eq i64 %iv.0.next, 100
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
%start.ext = zext i32 %start to i64
br label %loop
loop:
%iv.0 = phi i64 [ %start.ext, %entry ], [ %iv.0.next, %loop ]
%iv.1 = phi i64 [ 0, %entry ], [ %iv.1.next, %loop ]
%gep.dst = getelementptr float, ptr %dst, i64 %iv.1
store float 0.0, ptr %gep.dst, align 4
%iv.1.next = add i64 %iv.1, %start.ext
%iv.0.next = add i64 %iv.0, 1
%ec = icmp eq i64 %iv.0.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Keep this in sync with the same under LoopVectorize/