| ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt < %s -p loop-vectorize -force-vector-width=4 -disable-output \ |
| ; RUN: -vplan-print-after=scalarizeMemOpsWithIrregularTypes \ |
| ; RUN: -enable-mem-access-versioning=false 2>&1 | FileCheck %s |
| |
| define void @basic(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'basic' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @basic_optsize(ptr noalias %p.out, ptr %p, i64 %stride) #0 { |
| ; CHECK-LABEL: VPlan for loop in 'basic_optsize' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| attributes #0 = { optsize } |
| |
| define void @basic_minsize(ptr noalias %p.out, ptr %p, i64 %stride) #1 { |
| ; CHECK-LABEL: VPlan for loop in 'basic_minsize' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| attributes #1 = { minsize } |
| |
| ; When using byte-gep with wide memop unit-stride wouldn't be one at the geps |
| ; index but rather mem-access-type-size. This test has constant multiplier equal |
| ; to that exact value `8 * %stride` so that `%stride == 1` would result in |
| ; unit-strided load. |
| define void @byte_gep_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'byte_gep_scaled_stride' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%stride.x8> = mul ir<%stride>, ir<8> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride.x8> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %stride.x8 = mul i64 %stride, 8 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride.x8 |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i8, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %stride.x8 = mul i64 %stride, 8 |
| %idx = mul i64 %iv, %stride.x8 |
| |
| %gep.ld = getelementptr i8, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Similar to above but constant multiplier is smaller than load type's width, so |
| ; unit-strideness would require `%stride == 2`. |
| define void @byte_gep_under_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'byte_gep_under_scaled_stride' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%stride.x8> = mul ir<%stride>, ir<4> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride.x8> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %stride.x8 = mul i64 %stride, 4 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride.x8 |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i8, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %stride.x8 = mul i64 %stride, 4 |
| %idx = mul i64 %iv, %stride.x8 |
| |
| %gep.ld = getelementptr i8, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Another variation for constant multiplier with byte gep. This time the |
| ; multiplier is bigger than load access type so this cannot be speculated for |
| ; unit-strideness. |
| define void @byte_gep_over_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'byte_gep_over_scaled_stride' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%stride.x8> = mul ir<%stride>, ir<16> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride.x8> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %stride.x8 = mul i64 %stride, 16 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride.x8 |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i8, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %stride.x8 = mul i64 %stride, 16 |
| %idx = mul i64 %iv, %stride.x8 |
| |
| %gep.ld = getelementptr i8, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; And another one, with multiplier non-power-of-two. Cannot be speculated for |
| ; unit-strideness. |
| define void @byte_gep_non_power_of_two_scaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'byte_gep_non_power_of_two_scaled_stride' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%stride.x8> = mul ir<%stride>, ir<11> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride.x8> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %stride.x8 = mul i64 %stride, 11 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride.x8 |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i8, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %stride.x8 = mul i64 %stride, 11 |
| %idx = mul i64 %iv, %stride.x8 |
| |
| %gep.ld = getelementptr i8, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; No constant multiplier, need to speculate `%stride == sizeof(load-access-type)`. |
| define void @byte_gep_nonscaled_stride(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'byte_gep_nonscaled_stride' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i8, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i8, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; If we negate %stride before indexing, this might be a good heuristic to avoid |
| ; stride speculation... At the very least, keep this test for the documentation |
| ; purposes. |
| define void @byte_gep_negated_stride(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'byte_gep_negated_stride' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%stride.neg> = sub ir<0>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride.neg> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %stride.neg = sub i64 0, %stride |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride.neg |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i8, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %stride.neg = sub i64 0, %stride |
| %idx = mul i64 %iv, %stride.neg |
| |
| %gep.ld = getelementptr i8, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Two memory accesses can be speculated for unit-strideness by single predicate. |
| define void @shared_stride(ptr noalias %p.out, ptr %p0, ptr %p1, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'shared_stride' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld0> = getelementptr ir<%p0>, ir<%idx> |
| ; CHECK-NEXT: EMIT ir<%gep.ld1> = getelementptr ir<%p1>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld0> = load ir<%gep.ld0> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld1> = load ir<%gep.ld1> |
| ; CHECK-NEXT: EMIT ir<%val> = add ir<%ld0>, ir<%ld1> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld0 = getelementptr i64, ptr %p0, i64 %idx |
| ; CHECK-NEXT: IR %gep.ld1 = getelementptr i64, ptr %p1, i64 %idx |
| ; CHECK-NEXT: IR %ld0 = load i64, ptr %gep.ld0, align 8 |
| ; CHECK-NEXT: IR %ld1 = load i64, ptr %gep.ld1, align 8 |
| ; CHECK-NEXT: IR %val = add i64 %ld0, %ld1 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld0 = getelementptr i64, ptr %p0, i64 %idx |
| %gep.ld1 = getelementptr i64, ptr %p1, i64 %idx |
| %ld0 = load i64, ptr %gep.ld0, align 8 |
| %ld1 = load i64, ptr %gep.ld1, align 8 |
| %val = add i64 %ld0, %ld1 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %val, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Speculating one access for unit-strideness guarantees that the other one isn't. |
| define void @dependent_strides(ptr noalias %p.out, ptr %p0, ptr %p1, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'dependent_strides' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%stride1> = add ir<%stride>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx0> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%idx1> = mul ir<%iv>, ir<%stride1> |
| ; CHECK-NEXT: EMIT ir<%gep.ld0> = getelementptr ir<%p0>, ir<%idx0> |
| ; CHECK-NEXT: EMIT ir<%gep.ld1> = getelementptr ir<%p1>, ir<%idx1> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld0> = load ir<%gep.ld0> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld1> = load ir<%gep.ld1> |
| ; CHECK-NEXT: EMIT ir<%val> = add ir<%ld0>, ir<%ld1> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %stride1 = add i64 %stride, 1 |
| ; CHECK-NEXT: IR %idx0 = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %idx1 = mul i64 %iv, %stride1 |
| ; CHECK-NEXT: IR %gep.ld0 = getelementptr i64, ptr %p0, i64 %idx0 |
| ; CHECK-NEXT: IR %gep.ld1 = getelementptr i64, ptr %p1, i64 %idx1 |
| ; CHECK-NEXT: IR %ld0 = load i64, ptr %gep.ld0, align 8 |
| ; CHECK-NEXT: IR %ld1 = load i64, ptr %gep.ld1, align 8 |
| ; CHECK-NEXT: IR %val = add i64 %ld0, %ld1 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %stride1 = add i64 %stride, 1 |
| %idx0 = mul i64 %iv, %stride |
| %idx1 = mul i64 %iv, %stride1 |
| |
| %gep.ld0 = getelementptr i64, ptr %p0, i64 %idx0 |
| %gep.ld1 = getelementptr i64, ptr %p1, i64 %idx1 |
| %ld0 = load i64, ptr %gep.ld0, align 8 |
| %ld1 = load i64, ptr %gep.ld1, align 8 |
| %val = add i64 %ld0, %ld1 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %val, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Same as above but in different order. Might be reasonable to have some |
| ; heuristic to choose one over another instead of just speculating the first |
| ; access. |
| define void @dependent_strides_reverse_order(ptr noalias %p.out, ptr %p0, ptr %p1, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'dependent_strides_reverse_order' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%stride0> = add ir<%stride>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx0> = mul ir<%iv>, ir<%stride0> |
| ; CHECK-NEXT: EMIT ir<%idx1> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld0> = getelementptr ir<%p0>, ir<%idx0> |
| ; CHECK-NEXT: EMIT ir<%gep.ld1> = getelementptr ir<%p1>, ir<%idx1> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld0> = load ir<%gep.ld0> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld1> = load ir<%gep.ld1> |
| ; CHECK-NEXT: EMIT ir<%val> = add ir<%ld0>, ir<%ld1> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %stride0 = add i64 %stride, 1 |
| ; CHECK-NEXT: IR %idx0 = mul i64 %iv, %stride0 |
| ; CHECK-NEXT: IR %idx1 = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld0 = getelementptr i64, ptr %p0, i64 %idx0 |
| ; CHECK-NEXT: IR %gep.ld1 = getelementptr i64, ptr %p1, i64 %idx1 |
| ; CHECK-NEXT: IR %ld0 = load i64, ptr %gep.ld0, align 8 |
| ; CHECK-NEXT: IR %ld1 = load i64, ptr %gep.ld1, align 8 |
| ; CHECK-NEXT: IR %val = add i64 %ld0, %ld1 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %stride0 = add i64 %stride, 1 |
| %idx0 = mul i64 %iv, %stride0 |
| %idx1 = mul i64 %iv, %stride |
| |
| %gep.ld0 = getelementptr i64, ptr %p0, i64 %idx0 |
| %gep.ld1 = getelementptr i64, ptr %p1, i64 %idx1 |
| %ld0 = load i64, ptr %gep.ld0, align 8 |
| %ld1 = load i64, ptr %gep.ld1, align 8 |
| %val = add i64 %ld0, %ld1 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %val, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Two dependent accesses again, but instead of strides being dependent on each |
| ; other, we have the same stride but different access sizes. |
| define void @byte_dependent_byte_geps(ptr noalias %p.out, ptr %p0, ptr %p1, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'byte_dependent_byte_geps' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld0> = getelementptr ir<%p0>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld0> = load ir<%gep.ld0> |
| ; CHECK-NEXT: EMIT ir<%gep.ld1> = getelementptr ir<%p1>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld1> = load ir<%gep.ld1> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld1.ext> = sext ir<%ld1> to i64 |
| ; CHECK-NEXT: EMIT ir<%val> = add ir<%ld0>, ir<%ld1.ext> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld0 = getelementptr i8, ptr %p0, i64 %idx |
| ; CHECK-NEXT: IR %ld0 = load i64, ptr %gep.ld0, align 8 |
| ; CHECK-NEXT: IR %gep.ld1 = getelementptr i8, ptr %p1, i64 %idx |
| ; CHECK-NEXT: IR %ld1 = load i32, ptr %gep.ld1, align 8 |
| ; CHECK-NEXT: IR %ld1.ext = sext i32 %ld1 to i64 |
| ; CHECK-NEXT: IR %val = add i64 %ld0, %ld1.ext |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld0 = getelementptr i8, ptr %p0, i64 %idx |
| %ld0 = load i64, ptr %gep.ld0, align 8 |
| |
| %gep.ld1 = getelementptr i8, ptr %p1, i64 %idx |
| %ld1 = load i32, ptr %gep.ld1, align 8 |
| %ld1.ext = sext i32 %ld1 to i64 |
| |
| %val = add i64 %ld0, %ld1.ext |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %val, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Likewise but reverse order of accesses. |
| define void @byte_dependent_byte_geps_reverse_order(ptr noalias %p.out, ptr %p0, ptr %p1, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'byte_dependent_byte_geps_reverse_order' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld1> = getelementptr ir<%p1>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld1> = load ir<%gep.ld1> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld1.ext> = sext ir<%ld1> to i64 |
| ; CHECK-NEXT: EMIT ir<%gep.ld0> = getelementptr ir<%p0>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld0> = load ir<%gep.ld0> |
| ; CHECK-NEXT: EMIT ir<%val> = add ir<%ld0>, ir<%ld1.ext> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld1 = getelementptr i8, ptr %p1, i64 %idx |
| ; CHECK-NEXT: IR %ld1 = load i32, ptr %gep.ld1, align 8 |
| ; CHECK-NEXT: IR %ld1.ext = sext i32 %ld1 to i64 |
| ; CHECK-NEXT: IR %gep.ld0 = getelementptr i8, ptr %p0, i64 %idx |
| ; CHECK-NEXT: IR %ld0 = load i64, ptr %gep.ld0, align 8 |
| ; CHECK-NEXT: IR %val = add i64 %ld0, %ld1.ext |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld1 = getelementptr i8, ptr %p1, i64 %idx |
| %ld1 = load i32, ptr %gep.ld1, align 8 |
| %ld1.ext = sext i32 %ld1 to i64 |
| |
| %gep.ld0 = getelementptr i8, ptr %p0, i64 %idx |
| %ld0 = load i64, ptr %gep.ld0, align 8 |
| |
| %val = add i64 %ld0, %ld1.ext |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %val, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| |
| ; Interleave group with non-constant stride. Probabably doesn't make sense to |
| ; speculate the stride here (as a heuristic). |
| define void @strided_interleave(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'strided_interleave' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld0> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT ir<%gep.ld1> = getelementptr ir<%gep.ld0>, ir<1> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld0> = load ir<%gep.ld0> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld1> = load ir<%gep.ld1> |
| ; CHECK-NEXT: EMIT ir<%val> = add ir<%ld0>, ir<%ld1> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld0 = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %gep.ld1 = getelementptr i64, ptr %gep.ld0, i61 1 |
| ; CHECK-NEXT: IR %ld0 = load i64, ptr %gep.ld0, align 8 |
| ; CHECK-NEXT: IR %ld1 = load i64, ptr %gep.ld1, align 8 |
| ; CHECK-NEXT: IR %val = add i64 %ld0, %ld1 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld0 = getelementptr i64, ptr %p, i64 %idx |
| %gep.ld1 = getelementptr i64, ptr %gep.ld0, i61 1 |
| %ld0 = load i64, ptr %gep.ld0, align 8 |
| %ld1 = load i64, ptr %gep.ld1, align 8 |
| %val = add i64 %ld0, %ld1 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %val, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| |
| ; Mem access ptr in the form of `(%base,+,%stride)<%header>` where %base is |
| ; something present as in instruction in the loop body (although that would be a |
| ; SCEV expression, not SCEVUnknown). |
| define void @in_loop_base(ptr noalias %p.out, ptr %p, i64 %stride, i64 %offset) { |
| ; CHECK-LABEL: VPlan for loop in 'in_loop_base' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%mul> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%idx> = add ir<%mul>, ir<%offset> |
| ; CHECK-NEXT: EMIT ir<%gep.ld.base> = getelementptr ir<%p>, ir<%offset> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%gep.ld.base>, ir<%mul> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %mul = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %idx = add i64 %mul, %offset |
| ; CHECK-NEXT: IR %gep.ld.base = getelementptr i64, ptr %p, i64 %offset |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %gep.ld.base, i64 %mul |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %mul = mul i64 %iv, %stride |
| %idx = add i64 %mul, %offset |
| |
| %gep.ld.base = getelementptr i64,ptr %p, i64 %offset |
| %gep.ld = getelementptr i64, ptr %gep.ld.base, i64 %mul |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Base (non-recurrent) part of the memory access pointer doesn't have |
| ; corresponding IR value, e.g., `(base + %iv*%stirde) + %offset`. |
| define void @base_not_in_ir(ptr noalias %p.out, ptr %p, i64 %stride, i64 %offset) { |
| ; CHECK-LABEL: VPlan for loop in 'base_not_in_ir' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%mul> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%idx> = add ir<%mul>, ir<%offset> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %mul = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %idx = add i64 %mul, %offset |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %mul = mul i64 %iv, %stride |
| %idx = add i64 %mul, %offset |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; If the base pointer is uniform but not loop-invariant we could still speculate |
| ; access to be unit-strided, although not implemented yet. |
| define void @non_invariant_uniform_base(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'non_invariant_uniform_base' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%iv.sdiv32> = sdiv ir<%iv>, ir<32> |
| ; CHECK-NEXT: EMIT ir<%mul> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%idx> = add ir<%mul>, ir<%iv.sdiv32> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %iv.sdiv32 = sdiv i64 %iv, 32 |
| ; CHECK-NEXT: IR %mul = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %idx = add i64 %mul, %iv.sdiv32 |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %iv.sdiv32 = sdiv i64 %iv, 32 |
| |
| %mul = mul i64 %iv, %stride |
| %idx = add i64 %mul, %iv.sdiv32 |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @non_invariant_uniform_stride(ptr noalias %p.out, ptr %p, ptr %p.uni) { |
| ; CHECK-LABEL: VPlan for loop in 'non_invariant_uniform_stride' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%iv.sdiv32> = sdiv ir<%iv>, ir<32> |
| ; CHECK-NEXT: EMIT ir<%gep.uni> = getelementptr ir<%p.uni>, ir<%iv.sdiv32> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%stride> = load ir<%gep.uni> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %iv.sdiv32 = sdiv i64 %iv, 32 |
| ; CHECK-NEXT: IR %gep.uni = getelementptr i64, ptr %p.uni, i64 %iv.sdiv32 |
| ; CHECK-NEXT: IR %stride = load i64, ptr %gep.uni, align 4 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %iv.sdiv32 = sdiv i64 %iv, 32 |
| |
| %gep.uni = getelementptr i64, ptr %p.uni, i64 %iv.sdiv32 |
| %stride = load i64, ptr %gep.uni |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Not valuable by itself, but as a basis for the subsequent test to ensure that |
| ; non-constant trip count doesn't change anything by itself. Also show the order |
| ; of checks between stride speculation and trip-count check when not |
| ; tail-folding. |
| define void @non_constant_btc(ptr noalias %p.out, ptr %p, i64 %stride, i64 %n) { |
| ; CHECK-LABEL: VPlan for loop in 'non_constant_btc' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 smax %n) |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<%n> |
| ; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, %n |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, %n |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; BTC == stride, so stride speculation would result in zero vector loop |
| ; iterations. |
| define void @stride_as_btc(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'stride_as_btc' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 smax %stride) |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<%stride> |
| ; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, %stride |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, %stride |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Similar to above but a slightly more complex dependency between stride and |
| ; BTC. |
| define void @stride_dependent_btc(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'stride_dependent_btc' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: IR %n = add i64 %stride, 1 |
| ; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 smax (1 + %stride)) |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<%n> |
| ; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, %n |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| %n = add i64 %stride, 1 |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, %n |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; BTC dependent on %stride, but stride speculation doesn't necessarily mean no |
| ; vector loop iterations. The test shows in which order we emit the run time |
| ; checks for both. |
| define void @stride_btc_checks_order(ptr noalias %p.out, ptr %p, i64 %stride, i64 %m) { |
| ; CHECK-LABEL: VPlan for loop in 'stride_btc_checks_order' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: IR %n = mul i64 %m, %stride |
| ; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 smax (%stride * %m)) |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<%n> |
| ; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, %n |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| %n = mul i64 %m, %stride |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, %n |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; BTC fully defined by stride speculation but still allows vector loop execution. |
| define void @stride_dependent_btc_non_preventive(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'stride_dependent_btc_non_preventive' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: IR %n = add i64 %stride, 3 |
| ; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 smax (3 + %stride)) |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<%n> |
| ; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, %n |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| %n = add i64 %stride, 3 |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, %n |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Doesn't pass legality as run-time memory dependencies check doesn't support |
| ; strided accesses. If it did, the purpose of this test would be to show how all |
| ; three run-time checks (mem deps/stride speculation/trip-count) would be |
| ; ordered in respect to each other. Note that pointer aliasing check could |
| ; potentially be simplified if done after stride speculation. However, that |
| ; isn't necessarily the best idea because we could also multi-version for stride |
| ; and keep aliasing part generic and shared by both vector loops.. |
| define void @stride_btc_memdep_triple_check(ptr %p, i64 %stride, i64 %out.offset) { |
| entry: |
| %p.out = getelementptr i8, ptr %p, i64 %out.offset |
| %n = add i64 %stride, 3 |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, %n |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Same as above but memdep check doesn't depend on stride |
| define void @stride_btc_independent_memdep_triple_check(ptr %p, ptr noalias %p2, i64 %stride, i64 %out.offset) { |
| ; CHECK-LABEL: VPlan for loop in 'stride_btc_independent_memdep_triple_check' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: IR %p.out = getelementptr i8, ptr %p2, i64 %out.offset |
| ; CHECK-NEXT: IR %n = add i64 %stride, 3 |
| ; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (1 smax (3 + %stride)) |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.ld2> = getelementptr ir<%p2>, ir<%iv> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld2> = load ir<%gep.ld2> |
| ; CHECK-NEXT: EMIT ir<%val> = add ir<%ld>, ir<%ld2> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<%n> |
| ; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP4]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.ld2 = getelementptr i64, ptr %p2, i64 %iv |
| ; CHECK-NEXT: IR %ld2 = load i64, ptr %gep.ld2, align 8 |
| ; CHECK-NEXT: IR %val = add i64 %ld, %ld2 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, %n |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| %p.out = getelementptr i8, ptr %p2, i64 %out.offset |
| %n = add i64 %stride, 3 |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.ld2 = getelementptr i64, ptr %p2, i64 %iv |
| %ld2 = load i64, ptr %gep.ld2, align 8 |
| |
| %val = add i64 %ld, %ld2 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %val, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, %n |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @actual_stride_not_in_ir(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'actual_stride_not_in_ir' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%base> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%base>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %base = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %base, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %base = getelementptr i64, ptr %p, i64 %idx |
| %gep.ld = getelementptr i64, ptr %base, i64 %idx |
| |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Gep into multi-dimensional array. Strided last index can be speculated to |
| ; result in a unit-strided memory access. |
| define void @nd_array_last_idx(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'nd_array_last_idx' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<1>, ir<42>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr [256 x [256 x i64]], ptr %p, i64 1, i64 42, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr [256 x [256 x i64]], ptr %p, i64 1, i64 42, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| |
| ; Strided inner index will never result in unit-strided memory-access, even if |
| ; its stride is one. |
| define void @nd_array_non_last_idx(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'nd_array_non_last_idx' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<1>, ir<%idx>, ir<42> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr [256 x [256 x i64]], ptr %p, i64 1, i64 %idx, i64 42 |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr [256 x [256 x i64]], ptr %p, i64 1, i64 %idx, i64 42 |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Isn't unit-strided either. |
| define void @nd_array_multiple_idxs(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'nd_array_multiple_idxs' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<1>, ir<%idx>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr [256 x [256 x i64]], ptr %p, i64 1, i64 %idx, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr [256 x [256 x i64]], ptr %p, i64 1, i64 %idx, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Stride is used through `sext` in the loop. |
| define void @sext_stride(ptr noalias %p.out, ptr %p, i32 %stride.i32) { |
| ; CHECK-LABEL: VPlan for loop in 'sext_stride' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%stride> = sext ir<%stride.i32> to i64 |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %stride = sext i32 %stride.i32 to i64 |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %stride = sext i32 %stride.i32 to i64 |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Stride is used through `trunc` in the loop. |
| define void @trunc_stride(ptr noalias %p.out, ptr %p, i64 %stride.i64) { |
| ; CHECK-LABEL: VPlan for loop in 'trunc_stride' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%stride> = trunc ir<%stride.i64> to i32 |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %stride = trunc i64 %stride.i64 to i32 |
| ; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i32 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i32, ptr %p, i32 %idx |
| ; CHECK-NEXT: IR %ld = load i32, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i32, ptr %p.out, i32 %iv |
| ; CHECK-NEXT: IR store i32 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i32 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %header ] |
| %stride = trunc i64 %stride.i64 to i32 |
| %iv.next = add nsw i32 %iv, 1 |
| |
| %idx = mul i32 %iv, %stride |
| |
| %gep.ld = getelementptr i32, ptr %p, i32 %idx |
| %ld = load i32, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i32, ptr %p.out, i32 %iv |
| store i32 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i32 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; %stride is usedc through both `trunc`/`sext` for different accesses. |
| define void @trunc_ext_stride(ptr noalias %p.out, ptr %p0, ptr %p1, i32 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'trunc_ext_stride' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: IR %stride.trunc = trunc i32 %stride to i16 |
| ; CHECK-NEXT: IR %stride.ext = sext i32 %stride to i64 |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%iv.trunc> = trunc ir<%iv> to i16 |
| ; CHECK-NEXT: EMIT-SCALAR ir<%iv.ext> = sext ir<%iv> to i64 |
| ; CHECK-NEXT: EMIT ir<%idx.trunc> = mul ir<%iv.trunc>, ir<%stride.trunc> |
| ; CHECK-NEXT: EMIT ir<%idx.ext> = mul ir<%iv.ext>, ir<%stride.ext> |
| ; CHECK-NEXT: EMIT ir<%gep.trunc> = getelementptr ir<%p0>, ir<%idx.trunc> |
| ; CHECK-NEXT: EMIT ir<%gep.ext> = getelementptr ir<%p0>, ir<%idx.ext> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld.trunc> = load ir<%gep.trunc> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld.ext> = load ir<%gep.ext> |
| ; CHECK-NEXT: EMIT ir<%val> = add ir<%ld.trunc>, ir<%ld.ext> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, 1 |
| ; CHECK-NEXT: IR %iv.trunc = trunc i32 %iv to i16 |
| ; CHECK-NEXT: IR %iv.ext = sext i32 %iv to i64 |
| ; CHECK-NEXT: IR %idx.trunc = mul i16 %iv.trunc, %stride.trunc |
| ; CHECK-NEXT: IR %idx.ext = mul i64 %iv.ext, %stride.ext |
| ; CHECK-NEXT: IR %gep.trunc = getelementptr i32, ptr %p0, i16 %idx.trunc |
| ; CHECK-NEXT: IR %gep.ext = getelementptr i32, ptr %p0, i64 %idx.ext |
| ; CHECK-NEXT: IR %ld.trunc = load i32, ptr %gep.trunc, align 4 |
| ; CHECK-NEXT: IR %ld.ext = load i32, ptr %gep.ext, align 4 |
| ; CHECK-NEXT: IR %val = add i32 %ld.trunc, %ld.ext |
| ; CHECK-NEXT: IR %gep.st = getelementptr i32, ptr %p.out, i32 %iv |
| ; CHECK-NEXT: IR store i32 %val, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i32 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| %stride.trunc = trunc i32 %stride to i16 |
| %stride.ext = sext i32 %stride to i64 |
| br label %header |
| |
| header: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i32 %iv, 1 |
| |
| %iv.trunc = trunc i32 %iv to i16 |
| %iv.ext = sext i32 %iv to i64 |
| |
| %idx.trunc = mul i16 %iv.trunc, %stride.trunc |
| %idx.ext = mul i64 %iv.ext, %stride.ext |
| |
| %gep.trunc = getelementptr i32, ptr %p0, i16 %idx.trunc |
| %gep.ext = getelementptr i32, ptr %p0, i64 %idx.ext |
| |
| %ld.trunc = load i32, ptr %gep.trunc, align 4 |
| %ld.ext = load i32, ptr %gep.ext, align 4 |
| |
| %val = add i32 %ld.trunc, %ld.ext |
| |
| %gep.st = getelementptr i32, ptr %p.out, i32 %iv |
| store i32 %val, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i32 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Check that we don't speculate unit-strided masked memory access if masked wide |
| ; memory operation isn't legal (or that we properly pass the mask if it is). |
| define void @basic_masked(ptr noalias %p.out, ptr %p, i64 %stride, i64 %x) { |
| ; CHECK-LABEL: VPlan for loop in 'basic_masked' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%c> = icmp sge ir<%iv>, ir<%x> |
| ; CHECK-NEXT: Successor(s): if |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: if: |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride>, ir<%c> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld>, ir<%c> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st>, ir<%c> |
| ; CHECK-NEXT: Successor(s): latch |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: latch: |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%c> |
| ; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = or ir<%c>, vp<[[VP4]]> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128>, vp<[[VP5]]> |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP8]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %c = icmp sge i64 %iv, %x |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] |
| %iv.next = add nsw i64 %iv, 1 |
| %c = icmp sge i64 %iv, %x |
| br i1 %c, label %if, label %latch |
| |
| if: |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| br label %latch |
| |
| latch: |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; See https://github.com/llvm/llvm-project/issues/162922. |
| define void @stride_poison(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'stride_poison' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<poison> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, poison |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, poison |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Tests above all used loads, make sure that store is supported too. |
| define void @basic_strided_store(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'basic_strided_store' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%iv> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%idx> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %iv |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %idx |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %iv |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %idx |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; This test shows how/if we scalarize address computation def-chain if that |
| ; pointer has other non-scalar uses. |
| define void @ptr_vec_use(ptr noalias %p.out, ptr noalias %p.ptr.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'ptr_vec_use' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%ld>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%gep.ptr.st> = getelementptr ir<%p.ptr.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%gep.ld>, ir<%gep.ptr.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %ld, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %gep.ptr.st = getelementptr ptr, ptr %p.ptr.out, i64 %iv |
| ; CHECK-NEXT: IR store ptr %gep.ld, ptr %gep.ptr.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %gep.ptr.st = getelementptr ptr, ptr %p.ptr.out, i64 %iv |
| store ptr %gep.ld, ptr %gep.ptr.st |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Similar to above, but it's not the resulting pointer itself that has |
| ; non-scalar use but something in the middle of its def-chain. |
| define void @stride_idx_vec_use(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'stride_idx_vec_use' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%idx> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%val> = mul ir<%ld>, ir<%idx> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %idx = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %val = mul i64 %ld, %idx |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %idx = mul i64 %iv, %stride |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %val = mul i64 %ld, %idx |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %val, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; Another variation of the above, even longer def-chain. |
| define void @offset_stride_idx_vec_use(ptr noalias %p.out, ptr %p, i64 %stride) { |
| ; CHECK-LABEL: VPlan for loop in 'offset_stride_idx_vec_use' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: Live-in ir<128> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nsw ir<0>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%iv.next> = add nsw ir<%iv>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%iv.times.stride> = mul ir<%iv>, ir<%stride> |
| ; CHECK-NEXT: EMIT ir<%idx> = add ir<%iv.times.stride>, ir<42> |
| ; CHECK-NEXT: EMIT ir<%gep.ld> = getelementptr ir<%p>, ir<%idx> |
| ; CHECK-NEXT: EMIT-SCALAR ir<%ld> = load ir<%gep.ld> |
| ; CHECK-NEXT: EMIT ir<%val> = mul ir<%ld>, ir<%idx> |
| ; CHECK-NEXT: EMIT ir<%gep.st> = getelementptr ir<%p.out>, ir<%iv> |
| ; CHECK-NEXT: EMIT store ir<%val>, ir<%gep.st> |
| ; CHECK-NEXT: EMIT ir<%exitcond> = icmp sge ir<%iv.next>, ir<128> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = not ir<%exitcond> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = exiting-iv-value ir<%iv> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<128>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP6]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<header> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<header>: |
| ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.next = add nsw i64 %iv, 1 |
| ; CHECK-NEXT: IR %iv.times.stride = mul i64 %iv, %stride |
| ; CHECK-NEXT: IR %idx = add i64 %iv.times.stride, 42 |
| ; CHECK-NEXT: IR %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| ; CHECK-NEXT: IR %ld = load i64, ptr %gep.ld, align 8 |
| ; CHECK-NEXT: IR %val = mul i64 %ld, %idx |
| ; CHECK-NEXT: IR %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| ; CHECK-NEXT: IR store i64 %val, ptr %gep.st, align 8 |
| ; CHECK-NEXT: IR %exitcond = icmp slt i64 %iv.next, 128 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: remark: <unknown>:0:0: loop not vectorized: value that could not be identified as reduction is used outside the loop |
| ; |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| |
| %iv.times.stride = mul i64 %iv, %stride |
| %idx = add i64 %iv.times.stride, 42 |
| |
| %gep.ld = getelementptr i64, ptr %p, i64 %idx |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %val = mul i64 %ld, %idx |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %val, ptr %gep.st, align 8 |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| ; No VPlan dump because `%gep.ld` phi doesn't pass legality currently. |
| define void @base_ptr_induction_vec_use(ptr noalias %p.out, ptr noalias %p.ptr.out, ptr %p, i64 %stride) { |
| entry: |
| br label %header |
| |
| header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ] |
| %gep.ld = phi ptr [ %p, %entry ], [ %gep.ld.next, %header ] |
| %iv.next = add nsw i64 %iv, 1 |
| %gep.ld.next = getelementptr inbounds i64, ptr %gep.ld, i64 %stride |
| |
| %ld = load i64, ptr %gep.ld, align 8 |
| |
| %gep.st = getelementptr i64, ptr %p.out, i64 %iv |
| store i64 %ld, ptr %gep.st, align 8 |
| |
| %gep.ptr.st = getelementptr ptr, ptr %p.ptr.out, i64 %iv |
| store ptr %gep.ld, ptr %gep.ptr.st |
| |
| %exitcond = icmp slt i64 %iv.next, 128 |
| br i1 %exitcond, label %header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_rewrite_iv_scevs(i32 %start, ptr %dst) { |
| ; CHECK-LABEL: VPlan for loop in 'test_rewrite_iv_scevs' |
| ; CHECK: VPlan ' for UF>=1' { |
| ; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF |
| ; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF |
| ; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count |
| ; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = original trip-count |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<entry>: |
| ; CHECK-NEXT: IR %start.ext = zext i32 %start to i64 |
| ; CHECK-NEXT: EMIT vp<[[VP3]]> = EXPAND SCEV (100 + (-1 * (zext i32 %start to i64))<nsw>)<nsw> |
| ; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = EXPAND SCEV (zext i32 %start to i64) |
| ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: vector.ph: |
| ; CHECK-NEXT: Successor(s): vector loop |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: <x1> vector loop: { |
| ; CHECK-NEXT: vector.body: |
| ; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> |
| ; CHECK-NEXT: ir<%iv.0> = WIDEN-INDUCTION ir<%start.ext>, ir<1>, vp<[[VP0]]> |
| ; CHECK-NEXT: ir<%iv.1> = WIDEN-INDUCTION ir<0>, vp<[[VP4]]>, vp<[[VP0]]> |
| ; CHECK-NEXT: EMIT ir<%gep.dst> = getelementptr ir<%dst>, ir<%iv.1> |
| ; CHECK-NEXT: EMIT store ir<0.000000e+00>, ir<%gep.dst> |
| ; CHECK-NEXT: EMIT ir<%iv.1.next> = add ir<%iv.1>, vp<[[VP4]]> |
| ; CHECK-NEXT: EMIT ir<%iv.0.next> = add ir<%iv.0>, ir<1> |
| ; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.0.next>, ir<100> |
| ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP5]]>, vp<[[VP1]]> |
| ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2]]> |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: Successor(s): middle.block |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: middle.block: |
| ; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = exiting-iv-value ir<%iv.0> |
| ; CHECK-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = exiting-iv-value ir<%iv.1> |
| ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<[[VP3]]>, vp<[[VP2]]> |
| ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<exit>: |
| ; CHECK-NEXT: No successors |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: scalar.ph: |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP7]]>, middle.block ], [ ir<%start.ext>, ir-bb<entry> ] |
| ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val>.1 = phi [ vp<[[VP8]]>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; CHECK-NEXT: Successor(s): ir-bb<loop> |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: ir-bb<loop>: |
| ; CHECK-NEXT: IR %iv.0 = phi i64 [ %start.ext, %entry ], [ %iv.0.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph) |
| ; CHECK-NEXT: IR %iv.1 = phi i64 [ 0, %entry ], [ %iv.1.next, %loop ] (extra operand: vp<%bc.resume.val>.1 from scalar.ph) |
| ; CHECK-NEXT: IR %gep.dst = getelementptr float, ptr %dst, i64 %iv.1 |
| ; CHECK-NEXT: IR store float 0.000000e+00, ptr %gep.dst, align 4 |
| ; CHECK-NEXT: IR %iv.1.next = add i64 %iv.1, %start.ext |
| ; CHECK-NEXT: IR %iv.0.next = add i64 %iv.0, 1 |
| ; CHECK-NEXT: IR %ec = icmp eq i64 %iv.0.next, 100 |
| ; CHECK-NEXT: No successors |
| ; CHECK-NEXT: } |
| ; |
| entry: |
| %start.ext = zext i32 %start to i64 |
| br label %loop |
| |
| loop: |
| %iv.0 = phi i64 [ %start.ext, %entry ], [ %iv.0.next, %loop ] |
| %iv.1 = phi i64 [ 0, %entry ], [ %iv.1.next, %loop ] |
| %gep.dst = getelementptr float, ptr %dst, i64 %iv.1 |
| store float 0.0, ptr %gep.dst, align 4 |
| %iv.1.next = add i64 %iv.1, %start.ext |
| %iv.0.next = add i64 %iv.0, 1 |
| %ec = icmp eq i64 %iv.0.next, 100 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; Keep this in sync with the same under LoopVectorize/ |