| ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -passes=loop-vectorize -vplan-print-after="printFinalVPlan$" \ |
| ; RUN: -force-vector-width=4 -force-vector-interleave=2 -force-target-supports-masked-memory-ops -disable-output 2>&1 < %s | FileCheck %s --check-prefix=IC2 |
| ; RUN: opt -passes=loop-vectorize -vplan-print-after="printFinalVPlan$" \ |
| ; RUN: -force-vector-width=4 -force-vector-interleave=2 -force-target-supports-masked-memory-ops -prefer-predicate-over-epilogue=predicate-dont-vectorize \ |
| ; RUN: -disable-output 2>&1 < %s | FileCheck %s --check-prefix=IC2-TF |
| |
| ; This function is derived from the following C program: |
| ; int find_last_int_select(int N, int *data, int a) { |
| ; int t = -1; |
| ; for (int i = 0; i < N; i++) { |
| ; if (a < data[i]) |
| ; t = data[i]; |
| ; } |
| ; return t; |
| ; } |
| define i32 @find_last_int_select(i64 %N, ptr %data, i32 %a) { |
| ; IC2-LABEL: VPlan for loop in 'find_last_int_select' |
| ; IC2: VPlan 'Final VPlan for VF={4},UF={2}' { |
| ; IC2-NEXT: Live-in ir<%N> = original trip-count |
| ; IC2-EMPTY: |
| ; IC2-NEXT: ir-bb<entry>: |
| ; IC2-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%N>, ir<8> |
| ; IC2-NEXT: EMIT branch-on-cond vp<%min.iters.check> |
| ; IC2-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph |
| ; IC2-EMPTY: |
| ; IC2-NEXT: vector.ph: |
| ; IC2-NEXT: EMIT vp<%n.mod.vf> = urem ir<%N>, ir<8> |
| ; IC2-NEXT: EMIT vp<%n.vec> = sub ir<%N>, vp<%n.mod.vf> |
| ; IC2-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = broadcast ir<%a> |
| ; IC2-NEXT: Successor(s): vector.body |
| ; IC2-EMPTY: |
| ; IC2-NEXT: vector.body: |
| ; IC2-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] |
| ; IC2-NEXT: WIDEN-REDUCTION-PHI ir<%data.phi> = phi ir<-1>, vp<[[VP10:%[0-9]+]]> |
| ; IC2-NEXT: WIDEN-REDUCTION-PHI ir<%data.phi>.1 = phi ir<-1>, vp<[[VP11:%[0-9]+]]> |
| ; IC2-NEXT: WIDEN-PHI vp<[[VP4:%[0-9]+]]> = phi [ ir<false>, vector.ph ], [ vp<[[VP8:%[0-9]+]]>, vector.body ] |
| ; IC2-NEXT: WIDEN-PHI vp<[[VP5:%[0-9]+]]> = phi [ ir<false>, vector.ph ], [ vp<[[VP9:%[0-9]+]]>, vector.body ] |
| ; IC2-NEXT: CLONE ir<%ld.addr> = getelementptr inbounds ir<%data>, vp<%index> |
| ; IC2-NEXT: vp<[[VP6:%[0-9]+]]> = vector-pointer inbounds ir<%ld.addr>, ir<4> |
| ; IC2-NEXT: WIDEN ir<%ld> = load ir<%ld.addr> |
| ; IC2-NEXT: WIDEN ir<%ld>.1 = load vp<[[VP6]]> |
| ; IC2-NEXT: WIDEN ir<%select.cmp> = icmp slt vp<[[VP3]]>, ir<%ld> |
| ; IC2-NEXT: WIDEN ir<%select.cmp>.1 = icmp slt vp<[[VP3]]>, ir<%ld>.1 |
| ; IC2-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = any-of ir<%select.cmp>, ir<%select.cmp>.1 |
| ; IC2-NEXT: EMIT vp<[[VP8]]> = select vp<[[VP7]]>, ir<%select.cmp>, vp<[[VP4]]> |
| ; IC2-NEXT: EMIT vp<[[VP9]]> = select vp<[[VP7]]>, ir<%select.cmp>.1, vp<[[VP5]]> |
| ; IC2-NEXT: EMIT vp<[[VP10]]> = select vp<[[VP7]]>, ir<%ld>, ir<%data.phi> |
| ; IC2-NEXT: EMIT vp<[[VP11]]> = select vp<[[VP7]]>, ir<%ld>.1, ir<%data.phi>.1 |
| ; IC2-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<8> |
| ; IC2-NEXT: EMIT vp<[[VP12:%[0-9]+]]> = icmp eq vp<%index.next>, vp<%n.vec> |
| ; IC2-NEXT: EMIT branch-on-cond vp<[[VP12]]> |
| ; IC2-NEXT: Successor(s): middle.block, vector.body |
| ; IC2-EMPTY: |
| ; IC2-NEXT: middle.block: |
| ; IC2-NEXT: EMIT vp<[[VP14:%[0-9]+]]> = extract-last-active ir<-1>, vp<[[VP10]]>, vp<[[VP8]]>, vp<[[VP11]]>, vp<[[VP9]]> |
| ; IC2-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%N>, vp<%n.vec> |
| ; IC2-NEXT: EMIT branch-on-cond vp<%cmp.n> |
| ; IC2-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph> |
| ; IC2-EMPTY: |
| ; IC2-NEXT: ir-bb<exit>: |
| ; IC2-NEXT: IR %select.data.lcssa = phi i32 [ %select.data, %loop ] (extra operand: vp<[[VP14]]> from middle.block) |
| ; IC2-NEXT: No successors |
| ; IC2-EMPTY: |
| ; IC2-NEXT: ir-bb<scalar.ph>: |
| ; IC2-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%n.vec>, middle.block ], [ ir<0>, ir-bb<entry> ] |
| ; IC2-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[VP14]]>, middle.block ], [ ir<-1>, ir-bb<entry> ] |
| ; IC2-NEXT: Successor(s): ir-bb<loop> |
| ; IC2-EMPTY: |
| ; IC2-NEXT: ir-bb<loop>: |
| ; IC2-NEXT: IR %iv = phi i64 [ 0, %scalar.ph ], [ %iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>) |
| ; IC2-NEXT: IR %data.phi = phi i32 [ -1, %scalar.ph ], [ %select.data, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>) |
| ; IC2-NEXT: IR %ld.addr = getelementptr inbounds i32, ptr %data, i64 %iv |
| ; IC2-NEXT: IR %ld = load i32, ptr %ld.addr, align 4 |
| ; IC2-NEXT: IR %select.cmp = icmp slt i32 %a, %ld |
| ; IC2-NEXT: IR %select.data = select i1 %select.cmp, i32 %ld, i32 %data.phi |
| ; IC2-NEXT: IR %iv.next = add nuw nsw i64 %iv, 1 |
| ; IC2-NEXT: IR %exit.cmp = icmp eq i64 %iv.next, %N |
| ; IC2-NEXT: No successors |
| ; IC2-NEXT: } |
| ; |
| ; IC2-TF-LABEL: VPlan for loop in 'find_last_int_select' |
| ; IC2-TF: VPlan 'Final VPlan for VF={4},UF={2}' { |
| ; IC2-TF-NEXT: Live-in ir<%N> = original trip-count |
| ; IC2-TF-EMPTY: |
| ; IC2-TF-NEXT: ir-bb<entry>: |
| ; IC2-TF-NEXT: Successor(s): vector.ph |
| ; IC2-TF-EMPTY: |
| ; IC2-TF-NEXT: vector.ph: |
| ; IC2-TF-NEXT: EMIT vp<%n.rnd.up> = add ir<%N>, ir<7> |
| ; IC2-TF-NEXT: EMIT vp<%n.mod.vf> = urem vp<%n.rnd.up>, ir<8> |
| ; IC2-TF-NEXT: EMIT vp<%n.vec> = sub vp<%n.rnd.up>, vp<%n.mod.vf> |
| ; IC2-TF-NEXT: EMIT vp<%trip.count.minus.1> = sub ir<%N>, ir<1> |
| ; IC2-TF-NEXT: EMIT vp<[[VP2:%[0-9]+]]> = broadcast vp<%trip.count.minus.1> |
| ; IC2-TF-NEXT: EMIT vp<[[VP3:%[0-9]+]]> = broadcast ir<%a> |
| ; IC2-TF-NEXT: Successor(s): vector.body |
| ; IC2-TF-EMPTY: |
| ; IC2-TF-NEXT: vector.body: |
| ; IC2-TF-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ] |
| ; IC2-TF-NEXT: WIDEN-REDUCTION-PHI ir<%data.phi> = phi ir<-1>, vp<[[VP16:%[0-9]+]]> |
| ; IC2-TF-NEXT: WIDEN-REDUCTION-PHI ir<%data.phi>.1 = phi ir<-1>, vp<[[VP17:%[0-9]+]]> |
| ; IC2-TF-NEXT: WIDEN-PHI vp<[[VP4:%[0-9]+]]> = phi [ ir<false>, vector.ph ], [ vp<[[VP14:%[0-9]+]]>, vector.body ] |
| ; IC2-TF-NEXT: WIDEN-PHI vp<[[VP5:%[0-9]+]]> = phi [ ir<false>, vector.ph ], [ vp<[[VP15:%[0-9]+]]>, vector.body ] |
| ; IC2-TF-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = WIDEN-CANONICAL-INDUCTION vp<%index> |
| ; IC2-TF-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = WIDEN-CANONICAL-INDUCTION vp<%index>, ir<1> |
| ; IC2-TF-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = icmp ule vp<[[VP6]]>, vp<[[VP2]]> |
| ; IC2-TF-NEXT: EMIT vp<[[VP9:%[0-9]+]]> = icmp ule vp<[[VP7]]>, vp<[[VP2]]> |
| ; IC2-TF-NEXT: CLONE ir<%ld.addr> = getelementptr inbounds ir<%data>, vp<%index> |
| ; IC2-TF-NEXT: vp<[[VP10:%[0-9]+]]> = vector-pointer inbounds ir<%ld.addr>, ir<4> |
| ; IC2-TF-NEXT: WIDEN ir<%ld> = load ir<%ld.addr>, vp<[[VP8]]> |
| ; IC2-TF-NEXT: WIDEN ir<%ld>.1 = load vp<[[VP10]]>, vp<[[VP9]]> |
| ; IC2-TF-NEXT: WIDEN ir<%select.cmp> = icmp slt vp<[[VP3]]>, ir<%ld> |
| ; IC2-TF-NEXT: WIDEN ir<%select.cmp>.1 = icmp slt vp<[[VP3]]>, ir<%ld>.1 |
| ; IC2-TF-NEXT: EMIT vp<[[VP11:%[0-9]+]]> = logical-and vp<[[VP8]]>, ir<%select.cmp> |
| ; IC2-TF-NEXT: EMIT vp<[[VP12:%[0-9]+]]> = logical-and vp<[[VP9]]>, ir<%select.cmp>.1 |
| ; IC2-TF-NEXT: EMIT vp<[[VP13:%[0-9]+]]> = any-of vp<[[VP11]]>, vp<[[VP12]]> |
| ; IC2-TF-NEXT: EMIT vp<[[VP14]]> = select vp<[[VP13]]>, vp<[[VP11]]>, vp<[[VP4]]> |
| ; IC2-TF-NEXT: EMIT vp<[[VP15]]> = select vp<[[VP13]]>, vp<[[VP12]]>, vp<[[VP5]]> |
| ; IC2-TF-NEXT: EMIT vp<[[VP16]]> = select vp<[[VP13]]>, ir<%ld>, ir<%data.phi> |
| ; IC2-TF-NEXT: EMIT vp<[[VP17]]> = select vp<[[VP13]]>, ir<%ld>.1, ir<%data.phi>.1 |
| ; IC2-TF-NEXT: EMIT vp<%index.next> = add vp<%index>, ir<8> |
| ; IC2-TF-NEXT: EMIT vp<[[VP18:%[0-9]+]]> = icmp eq vp<%index.next>, vp<%n.vec> |
| ; IC2-TF-NEXT: EMIT branch-on-cond vp<[[VP18]]> |
| ; IC2-TF-NEXT: Successor(s): middle.block, vector.body |
| ; IC2-TF-EMPTY: |
| ; IC2-TF-NEXT: middle.block: |
| ; IC2-TF-NEXT: EMIT vp<[[VP20:%[0-9]+]]> = extract-last-active ir<-1>, vp<[[VP16]]>, vp<[[VP14]]>, vp<[[VP17]]>, vp<[[VP15]]> |
| ; IC2-TF-NEXT: Successor(s): ir-bb<exit> |
| ; IC2-TF-EMPTY: |
| ; IC2-TF-NEXT: ir-bb<exit>: |
| ; IC2-TF-NEXT: IR %select.data.lcssa = phi i32 [ %select.data, %loop ] (extra operand: vp<[[VP20]]> from middle.block) |
| ; IC2-TF-NEXT: No successors |
| ; IC2-TF-NEXT: } |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %data.phi = phi i32 [ -1, %entry ], [ %select.data, %loop ] |
| %ld.addr = getelementptr inbounds i32, ptr %data, i64 %iv |
| %ld = load i32, ptr %ld.addr, align 4 |
| %select.cmp = icmp slt i32 %a, %ld |
| %select.data = select i1 %select.cmp, i32 %ld, i32 %data.phi |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %exit.cmp = icmp eq i64 %iv.next, %N |
| br i1 %exit.cmp, label %exit, label %loop |
| |
| exit: |
| ret i32 %select.data |
| } |