| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve | FileCheck %s |
| |
| define i64 @select_or_reduce_v2i1(ptr nocapture noundef readonly %src) { |
| ; CHECK-LABEL: select_or_reduce_v2i1: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: .LBB0_1: // %vector.body |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr q0, [x0, x8] |
| ; CHECK-NEXT: cmeq v0.2d, v0.2d, #0 |
| ; CHECK-NEXT: umaxv s0, v0.4s |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: tbnz w9, #0, .LBB0_3 |
| ; CHECK-NEXT: // %bb.2: // %vector.body |
| ; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 |
| ; CHECK-NEXT: cmp x8, #16 |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: b.ne .LBB0_1 |
| ; CHECK-NEXT: .LBB0_3: // %middle.split |
| ; CHECK-NEXT: and x0, x9, #0x1 |
| ; CHECK-NEXT: ret |
| entry: |
| br label %vector.body |
| |
| vector.body: |
| %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] |
| %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index |
| %wide.load = load <2 x ptr>, ptr %arrayidx, align 8 |
| %cond = icmp eq <2 x ptr> %wide.load, splat(ptr zeroinitializer) |
| %index.next = add nuw i64 %index, 2 |
| %or.reduc = tail call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %cond) |
| %iv.cmp = icmp eq i64 %index.next, 4 |
| %exit.cond = or i1 %or.reduc, %iv.cmp |
| br i1 %exit.cond, label %middle.split, label %vector.body |
| |
| middle.split: |
| %sel = select i1 %or.reduc, i64 1, i64 0 |
| ret i64 %sel |
| } |
| |
| define i64 @br_or_reduce_v2i1(ptr nocapture noundef readonly %src, ptr noundef readnone %p) { |
| ; CHECK-LABEL: br_or_reduce_v2i1: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: .LBB1_1: // %vector.body |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr q0, [x0, x8] |
| ; CHECK-NEXT: cmeq v0.2d, v0.2d, #0 |
| ; CHECK-NEXT: umaxv s0, v0.4s |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: tbnz w9, #0, .LBB1_3 |
| ; CHECK-NEXT: // %bb.2: // %vector.body |
| ; CHECK-NEXT: // in Loop: Header=BB1_1 Depth=1 |
| ; CHECK-NEXT: cmp x8, #16 |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: b.ne .LBB1_1 |
| ; CHECK-NEXT: .LBB1_3: // %middle.split |
| ; CHECK-NEXT: tbz w9, #0, .LBB1_5 |
| ; CHECK-NEXT: // %bb.4: // %found |
| ; CHECK-NEXT: mov w8, #56 // =0x38 |
| ; CHECK-NEXT: mov w0, #1 // =0x1 |
| ; CHECK-NEXT: str x8, [x1] |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB1_5: |
| ; CHECK-NEXT: mov x0, xzr |
| ; CHECK-NEXT: ret |
| entry: |
| br label %vector.body |
| |
| vector.body: |
| %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] |
| %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index |
| %wide.load = load <2 x ptr>, ptr %arrayidx, align 8 |
| %cond = icmp eq <2 x ptr> %wide.load, splat(ptr zeroinitializer) |
| %index.next = add nuw i64 %index, 2 |
| %or.reduc = tail call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %cond) |
| %iv.cmp = icmp eq i64 %index.next, 4 |
| %exit.cond = or i1 %or.reduc, %iv.cmp |
| br i1 %exit.cond, label %middle.split, label %vector.body |
| |
| middle.split: |
| br i1 %or.reduc, label %found, label %notfound |
| |
| found: |
| store i64 56, ptr %p, align 8 |
| ret i64 1 |
| |
| notfound: |
| ret i64 0 |
| } |
| |
| define i64 @select_or_reduce_nxv2i1(ptr nocapture noundef readonly %src) { |
| ; CHECK-LABEL: select_or_reduce_nxv2i1: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: cntd x8 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: mov x9, xzr |
| ; CHECK-NEXT: neg x10, x8 |
| ; CHECK-NEXT: add x10, x10, #4 |
| ; CHECK-NEXT: .LBB2_1: // %vector.body |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] |
| ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-NEXT: b.ne .LBB2_3 |
| ; CHECK-NEXT: // %bb.2: // %vector.body |
| ; CHECK-NEXT: // in Loop: Header=BB2_1 Depth=1 |
| ; CHECK-NEXT: cmp x10, x9 |
| ; CHECK-NEXT: add x9, x9, x8 |
| ; CHECK-NEXT: b.ne .LBB2_1 |
| ; CHECK-NEXT: .LBB2_3: // %middle.split |
| ; CHECK-NEXT: ptest p0, p1.b |
| ; CHECK-NEXT: cset w0, ne |
| ; CHECK-NEXT: ret |
| entry: |
| %vscale = tail call i64 @llvm.vscale.i64() |
| %vf = shl nuw nsw i64 %vscale, 1 |
| br label %vector.body |
| |
| vector.body: |
| %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] |
| %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index |
| %wide.load = load <vscale x 2 x ptr>, ptr %arrayidx, align 8 |
| %cond = icmp eq <vscale x 2 x ptr> %wide.load, splat(ptr zeroinitializer) |
| %index.next = add nuw i64 %index, %vf |
| %or.reduc = tail call i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1> %cond) |
| %iv.cmp = icmp eq i64 %index.next, 4 |
| %exit.cond = or i1 %or.reduc, %iv.cmp |
| br i1 %exit.cond, label %middle.split, label %vector.body |
| |
| middle.split: |
| %sel = select i1 %or.reduc, i64 1, i64 0 |
| ret i64 %sel |
| } |
| |
| define i64 @br_or_reduce_nxv2i1(ptr nocapture noundef readonly %src, ptr noundef readnone %p) { |
| ; CHECK-LABEL: br_or_reduce_nxv2i1: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: cntd x8 |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: mov x9, xzr |
| ; CHECK-NEXT: neg x10, x8 |
| ; CHECK-NEXT: add x10, x10, #4 |
| ; CHECK-NEXT: .LBB3_1: // %vector.body |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] |
| ; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-NEXT: b.ne .LBB3_3 |
| ; CHECK-NEXT: // %bb.2: // %vector.body |
| ; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1 |
| ; CHECK-NEXT: cmp x10, x9 |
| ; CHECK-NEXT: add x9, x9, x8 |
| ; CHECK-NEXT: b.ne .LBB3_1 |
| ; CHECK-NEXT: .LBB3_3: // %middle.split |
| ; CHECK-NEXT: ptest p0, p1.b |
| ; CHECK-NEXT: b.eq .LBB3_5 |
| ; CHECK-NEXT: // %bb.4: // %found |
| ; CHECK-NEXT: mov w8, #56 // =0x38 |
| ; CHECK-NEXT: mov w0, #1 // =0x1 |
| ; CHECK-NEXT: str x8, [x1] |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB3_5: |
| ; CHECK-NEXT: mov x0, xzr |
| ; CHECK-NEXT: ret |
| entry: |
| %vscale = tail call i64 @llvm.vscale.i64() |
| %vf = shl nuw nsw i64 %vscale, 1 |
| br label %vector.body |
| |
| vector.body: |
| %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] |
| %arrayidx = getelementptr inbounds ptr, ptr %src, i64 %index |
| %wide.load = load <vscale x 2 x ptr>, ptr %arrayidx, align 8 |
| %cond = icmp eq <vscale x 2 x ptr> %wide.load, splat(ptr zeroinitializer) |
| %index.next = add nuw i64 %index, %vf |
| %or.reduc = tail call i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1> %cond) |
| %iv.cmp = icmp eq i64 %index.next, 4 |
| %exit.cond = or i1 %or.reduc, %iv.cmp |
| br i1 %exit.cond, label %middle.split, label %vector.body |
| |
| middle.split: |
| br i1 %or.reduc, label %found, label %notfound |
| |
| found: |
| store i64 56, ptr %p, align 8 |
| ret i64 1 |
| |
| notfound: |
| ret i64 0 |
| } |
| |
| declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>) |
| declare i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1>) |