| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 |
| ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 |
| ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 |
| ; RUN: llc -aarch64-sve-vector-bits-min=256 -mattr=sve2p2 < %s | FileCheck %s -check-prefixes=CHECK-EXPAND |
| |
| target triple = "aarch64-unknown-linux-gnu" |
| |
| ; |
| ; Masked Loads |
| ; |
| |
| define <2 x half> @masked_load_v2f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 { |
| ; CHECK-LABEL: masked_load_v2f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr s0, [x0] |
| ; CHECK-NEXT: ldr s1, [x1] |
| ; CHECK-NEXT: fcmeq v0.4h, v0.4h, v1.4h |
| ; CHECK-NEXT: index z1.s, #1, #1 |
| ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 |
| ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b |
| ; CHECK-NEXT: addp v1.2s, v0.2s, v0.2s |
| ; CHECK-NEXT: movi d0, #0000000000000000 |
| ; CHECK-NEXT: fmov w8, s1 |
| ; CHECK-NEXT: tbnz w8, #0, .LBB0_3 |
| ; CHECK-NEXT: // %bb.1: // %else |
| ; CHECK-NEXT: tbnz w8, #1, .LBB0_4 |
| ; CHECK-NEXT: .LBB0_2: // %else2 |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB0_3: // %cond.load |
| ; CHECK-NEXT: movi d0, #0000000000000000 |
| ; CHECK-NEXT: ld1 { v0.h }[0], [x0], #2 |
| ; CHECK-NEXT: tbz w8, #1, .LBB0_2 |
| ; CHECK-NEXT: .LBB0_4: // %cond.load1 |
| ; CHECK-NEXT: ld1 { v0.h }[1], [x0] |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_v2f16: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ldr s1, [x0] |
| ; CHECK-EXPAND-NEXT: ldr s2, [x1] |
| ; CHECK-EXPAND-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-EXPAND-NEXT: ptrue p0.h, vl4 |
| ; CHECK-EXPAND-NEXT: fcmeq v1.4h, v1.4h, v2.4h |
| ; CHECK-EXPAND-NEXT: sshll v1.4s, v1.4h, #0 |
| ; CHECK-EXPAND-NEXT: mov v0.h[0], v1.h[0] |
| ; CHECK-EXPAND-NEXT: mov w8, v1.s[1] |
| ; CHECK-EXPAND-NEXT: mov v0.h[1], w8 |
| ; CHECK-EXPAND-NEXT: cmpne p1.h, p0/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h |
| ; CHECK-EXPAND-NEXT: whilelo p0.h, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h |
| ; CHECK-EXPAND-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <2 x half>, ptr %ap |
| %b = load <2 x half>, ptr %bp |
| %mask = fcmp oeq <2 x half> %a, %b |
| %load = call <2 x half> @llvm.masked.expandload.v2f16(ptr %ap, <2 x i1> %mask, <2 x half> zeroinitializer) |
| ret <2 x half> %load |
| } |
| |
| define <2 x float> @masked_load_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { |
| ; CHECK-LABEL: masked_load_v2f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr d1, [x0] |
| ; CHECK-NEXT: ldr d2, [x1] |
| ; CHECK-NEXT: index z0.s, #1, #1 |
| ; CHECK-NEXT: fcmeq v1.2s, v1.2s, v2.2s |
| ; CHECK-NEXT: and v0.8b, v1.8b, v0.8b |
| ; CHECK-NEXT: addp v1.2s, v0.2s, v0.2s |
| ; CHECK-NEXT: movi d0, #0000000000000000 |
| ; CHECK-NEXT: fmov w8, s1 |
| ; CHECK-NEXT: tbnz w8, #0, .LBB1_3 |
| ; CHECK-NEXT: // %bb.1: // %else |
| ; CHECK-NEXT: tbnz w8, #1, .LBB1_4 |
| ; CHECK-NEXT: .LBB1_2: // %else2 |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB1_3: // %cond.load |
| ; CHECK-NEXT: movi d0, #0000000000000000 |
| ; CHECK-NEXT: ld1 { v0.s }[0], [x0], #4 |
| ; CHECK-NEXT: tbz w8, #1, .LBB1_2 |
| ; CHECK-NEXT: .LBB1_4: // %cond.load1 |
| ; CHECK-NEXT: ld1 { v0.s }[1], [x0] |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_v2f32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ldr d0, [x0] |
| ; CHECK-EXPAND-NEXT: ldr d1, [x1] |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl2 |
| ; CHECK-EXPAND-NEXT: fcmeq v0.2s, v0.2s, v1.2s |
| ; CHECK-EXPAND-NEXT: cmpne p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s |
| ; CHECK-EXPAND-NEXT: whilelo p0.s, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s |
| ; CHECK-EXPAND-NEXT: // kill: def $d0 killed $d0 killed $z0 |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <2 x float>, ptr %ap |
| %b = load <2 x float>, ptr %bp |
| %mask = fcmp oeq <2 x float> %a, %b |
| %load = call <2 x float> @llvm.masked.expandload.v2f32(ptr %ap, <2 x i1> %mask, <2 x float> zeroinitializer) |
| ret <2 x float> %load |
| } |
| |
| define <4 x float> @masked_load_v4f32(ptr %ap, ptr %bp) vscale_range(1,0) #0 { |
| ; CHECK-LABEL: masked_load_v4f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: ldr q1, [x1] |
| ; CHECK-NEXT: adrp x8, .LCPI2_0 |
| ; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s |
| ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] |
| ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b |
| ; CHECK-NEXT: addv s1, v0.4s |
| ; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-NEXT: fmov w8, s1 |
| ; CHECK-NEXT: tbnz w8, #0, .LBB2_5 |
| ; CHECK-NEXT: // %bb.1: // %else |
| ; CHECK-NEXT: tbnz w8, #1, .LBB2_6 |
| ; CHECK-NEXT: .LBB2_2: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB2_7 |
| ; CHECK-NEXT: .LBB2_3: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB2_8 |
| ; CHECK-NEXT: .LBB2_4: // %else10 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB2_5: // %cond.load |
| ; CHECK-NEXT: ld1 { v0.s }[0], [x0], #4 |
| ; CHECK-NEXT: tbz w8, #1, .LBB2_2 |
| ; CHECK-NEXT: .LBB2_6: // %cond.load1 |
| ; CHECK-NEXT: ld1 { v0.s }[1], [x0], #4 |
| ; CHECK-NEXT: tbz w8, #2, .LBB2_3 |
| ; CHECK-NEXT: .LBB2_7: // %cond.load5 |
| ; CHECK-NEXT: ld1 { v0.s }[2], [x0], #4 |
| ; CHECK-NEXT: tbz w8, #3, .LBB2_4 |
| ; CHECK-NEXT: .LBB2_8: // %cond.load9 |
| ; CHECK-NEXT: ld1 { v0.s }[3], [x0] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_v4f32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ldr q0, [x0] |
| ; CHECK-EXPAND-NEXT: ldr q1, [x1] |
| ; CHECK-EXPAND-NEXT: adrp x8, .LCPI2_0 |
| ; CHECK-EXPAND-NEXT: fcmeq v0.4s, v0.4s, v1.4s |
| ; CHECK-EXPAND-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] |
| ; CHECK-EXPAND-NEXT: and v0.16b, v0.16b, v1.16b |
| ; CHECK-EXPAND-NEXT: addv s1, v0.4s |
| ; CHECK-EXPAND-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-EXPAND-NEXT: fmov w8, s1 |
| ; CHECK-EXPAND-NEXT: tbnz w8, #0, .LBB2_5 |
| ; CHECK-EXPAND-NEXT: // %bb.1: // %else |
| ; CHECK-EXPAND-NEXT: tbnz w8, #1, .LBB2_6 |
| ; CHECK-EXPAND-NEXT: .LBB2_2: // %else2 |
| ; CHECK-EXPAND-NEXT: tbnz w8, #2, .LBB2_7 |
| ; CHECK-EXPAND-NEXT: .LBB2_3: // %else6 |
| ; CHECK-EXPAND-NEXT: tbnz w8, #3, .LBB2_8 |
| ; CHECK-EXPAND-NEXT: .LBB2_4: // %else10 |
| ; CHECK-EXPAND-NEXT: ret |
| ; CHECK-EXPAND-NEXT: .LBB2_5: // %cond.load |
| ; CHECK-EXPAND-NEXT: ld1 { v0.s }[0], [x0], #4 |
| ; CHECK-EXPAND-NEXT: tbz w8, #1, .LBB2_2 |
| ; CHECK-EXPAND-NEXT: .LBB2_6: // %cond.load1 |
| ; CHECK-EXPAND-NEXT: ld1 { v0.s }[1], [x0], #4 |
| ; CHECK-EXPAND-NEXT: tbz w8, #2, .LBB2_3 |
| ; CHECK-EXPAND-NEXT: .LBB2_7: // %cond.load5 |
| ; CHECK-EXPAND-NEXT: ld1 { v0.s }[2], [x0], #4 |
| ; CHECK-EXPAND-NEXT: tbz w8, #3, .LBB2_4 |
| ; CHECK-EXPAND-NEXT: .LBB2_8: // %cond.load9 |
| ; CHECK-EXPAND-NEXT: ld1 { v0.s }[3], [x0] |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <4 x float>, ptr %ap |
| %b = load <4 x float>, ptr %bp |
| %mask = fcmp oeq <4 x float> %a, %b |
| %load = call <4 x float> @llvm.masked.expandload.v4f32(ptr %ap, <4 x i1> %mask, <4 x float> zeroinitializer) |
| ret <4 x float> %load |
| } |
| |
| define void @masked_load_v8f32(ptr %ap, ptr %bp, ptr %c) vscale_range(2,0) #0 { |
| ; CHECK-LABEL: masked_load_v8f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: sub x9, sp, #48 |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 |
| ; CHECK-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: ptrue p0.s, vl8 |
| ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-NEXT: umov w8, v0.b[0] |
| ; CHECK-NEXT: umov w9, v0.b[1] |
| ; CHECK-NEXT: umov w10, v0.b[2] |
| ; CHECK-NEXT: and w8, w8, #0x1 |
| ; CHECK-NEXT: bfi w8, w9, #1, #1 |
| ; CHECK-NEXT: umov w9, v0.b[3] |
| ; CHECK-NEXT: bfi w8, w10, #2, #1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: bfi w8, w9, #3, #1 |
| ; CHECK-NEXT: umov w9, v0.b[5] |
| ; CHECK-NEXT: bfi w8, w10, #4, #1 |
| ; CHECK-NEXT: umov w10, v0.b[6] |
| ; CHECK-NEXT: bfi w8, w9, #5, #1 |
| ; CHECK-NEXT: umov w9, v0.b[7] |
| ; CHECK-NEXT: bfi w8, w10, #6, #1 |
| ; CHECK-NEXT: orr w9, w8, w9, lsl #7 |
| ; CHECK-NEXT: and w8, w9, #0xff |
| ; CHECK-NEXT: tbz w9, #0, .LBB3_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ldr s0, [x0], #4 |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #16] |
| ; CHECK-NEXT: str xzr, [sp, #8] |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: str wzr, [sp, #4] |
| ; CHECK-NEXT: str s0, [sp] |
| ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9] |
| ; CHECK-NEXT: tbnz w8, #1, .LBB3_3 |
| ; CHECK-NEXT: b .LBB3_4 |
| ; CHECK-NEXT: .LBB3_2: |
| ; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-NEXT: tbz w8, #1, .LBB3_4 |
| ; CHECK-NEXT: .LBB3_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: .LBB3_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB3_12 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB3_13 |
| ; CHECK-NEXT: .LBB3_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB3_14 |
| ; CHECK-NEXT: .LBB3_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB3_15 |
| ; CHECK-NEXT: .LBB3_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB3_16 |
| ; CHECK-NEXT: .LBB3_9: // %else22 |
| ; CHECK-NEXT: tbz w8, #7, .LBB3_11 |
| ; CHECK-NEXT: .LBB3_10: // %cond.load25 |
| ; CHECK-NEXT: mov w8, #7 // =0x7 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w8 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0] |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: .LBB3_11: // %else26 |
| ; CHECK-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB3_12: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #3, .LBB3_6 |
| ; CHECK-NEXT: .LBB3_13: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #4, .LBB3_7 |
| ; CHECK-NEXT: .LBB3_14: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #5, .LBB3_8 |
| ; CHECK-NEXT: .LBB3_15: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #6, .LBB3_9 |
| ; CHECK-NEXT: .LBB3_16: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB3_10 |
| ; CHECK-NEXT: b .LBB3_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_v8f32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s |
| ; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <8 x float>, ptr %ap |
| %b = load <8 x float>, ptr %bp |
| %mask = fcmp oeq <8 x float> %a, %b |
| %load = call <8 x float> @llvm.masked.expandload.v8f32(ptr %ap, <8 x i1> %mask, <8 x float> zeroinitializer) |
| store <8 x float> %load, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_v16f32(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_v16f32: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: sub x9, sp, #48 |
| ; VBITS_GE_256-NEXT: mov x29, sp |
| ; VBITS_GE_256-NEXT: and sp, x9, #0xffffffffffffffe0 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa w29, 16 |
| ; VBITS_GE_256-NEXT: .cfi_offset w30, -8 |
| ; VBITS_GE_256-NEXT: .cfi_offset w29, -16 |
| ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI4_0 |
| ; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s |
| ; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b |
| ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] |
| ; VBITS_GE_256-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] |
| ; VBITS_GE_256-NEXT: and v0.16b, v1.16b, v0.16b |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_256-NEXT: addv h0, v0.8h |
| ; VBITS_GE_256-NEXT: fmov w9, s0 |
| ; VBITS_GE_256-NEXT: fmov w8, s0 |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB4_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldr s0, [x0], #4 |
| ; VBITS_GE_256-NEXT: stp xzr, xzr, [sp, #16] |
| ; VBITS_GE_256-NEXT: str xzr, [sp, #8] |
| ; VBITS_GE_256-NEXT: mov x9, sp |
| ; VBITS_GE_256-NEXT: movi v1.2d, #0000000000000000 |
| ; VBITS_GE_256-NEXT: str wzr, [sp, #4] |
| ; VBITS_GE_256-NEXT: str s0, [sp] |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x9] |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB4_3 |
| ; VBITS_GE_256-NEXT: b .LBB4_4 |
| ; VBITS_GE_256-NEXT: .LBB4_2: |
| ; VBITS_GE_256-NEXT: movi v0.2d, #0000000000000000 |
| ; VBITS_GE_256-NEXT: movi v1.2d, #0000000000000000 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB4_4 |
| ; VBITS_GE_256-NEXT: .LBB4_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: .LBB4_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB4_20 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB4_21 |
| ; VBITS_GE_256-NEXT: .LBB4_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB4_22 |
| ; VBITS_GE_256-NEXT: .LBB4_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB4_23 |
| ; VBITS_GE_256-NEXT: .LBB4_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB4_24 |
| ; VBITS_GE_256-NEXT: .LBB4_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB4_25 |
| ; VBITS_GE_256-NEXT: .LBB4_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB4_26 |
| ; VBITS_GE_256-NEXT: .LBB4_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB4_27 |
| ; VBITS_GE_256-NEXT: .LBB4_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB4_28 |
| ; VBITS_GE_256-NEXT: .LBB4_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB4_29 |
| ; VBITS_GE_256-NEXT: .LBB4_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB4_30 |
| ; VBITS_GE_256-NEXT: .LBB4_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB4_31 |
| ; VBITS_GE_256-NEXT: .LBB4_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB4_32 |
| ; VBITS_GE_256-NEXT: .LBB4_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB4_19 |
| ; VBITS_GE_256-NEXT: .LBB4_18: // %cond.load57 |
| ; VBITS_GE_256-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w8 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0] |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: .LBB4_19: // %else58 |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: mov sp, x29 |
| ; VBITS_GE_256-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB4_20: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB4_6 |
| ; VBITS_GE_256-NEXT: .LBB4_21: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB4_7 |
| ; VBITS_GE_256-NEXT: .LBB4_22: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB4_8 |
| ; VBITS_GE_256-NEXT: .LBB4_23: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB4_9 |
| ; VBITS_GE_256-NEXT: .LBB4_24: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB4_10 |
| ; VBITS_GE_256-NEXT: .LBB4_25: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB4_11 |
| ; VBITS_GE_256-NEXT: .LBB4_26: // %cond.load29 |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl1 |
| ; VBITS_GE_256-NEXT: mov z1.s, p1/m, z2.s |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB4_12 |
| ; VBITS_GE_256-NEXT: .LBB4_27: // %cond.load33 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB4_13 |
| ; VBITS_GE_256-NEXT: .LBB4_28: // %cond.load37 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB4_14 |
| ; VBITS_GE_256-NEXT: .LBB4_29: // %cond.load41 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB4_15 |
| ; VBITS_GE_256-NEXT: .LBB4_30: // %cond.load45 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB4_16 |
| ; VBITS_GE_256-NEXT: .LBB4_31: // %cond.load49 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB4_17 |
| ; VBITS_GE_256-NEXT: .LBB4_32: // %cond.load53 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: ldr s2, [x0], #4 |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB4_18 |
| ; VBITS_GE_256-NEXT: b .LBB4_19 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_v16f32: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: sub x9, sp, #112 |
| ; VBITS_GE_512-NEXT: mov x29, sp |
| ; VBITS_GE_512-NEXT: and sp, x9, #0xffffffffffffffc0 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa w29, 16 |
| ; VBITS_GE_512-NEXT: .cfi_offset w30, -8 |
| ; VBITS_GE_512-NEXT: .cfi_offset w29, -16 |
| ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[8] |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[3] |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[4] |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[10] |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[5] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[9] |
| ; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[11] |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[6] |
| ; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w15 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xffff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB4_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldr s0, [x0], #4 |
| ; VBITS_GE_512-NEXT: stp xzr, xzr, [sp, #48] |
| ; VBITS_GE_512-NEXT: stp xzr, xzr, [sp, #32] |
| ; VBITS_GE_512-NEXT: mov x9, sp |
| ; VBITS_GE_512-NEXT: stp xzr, xzr, [sp, #16] |
| ; VBITS_GE_512-NEXT: str xzr, [sp, #8] |
| ; VBITS_GE_512-NEXT: str wzr, [sp, #4] |
| ; VBITS_GE_512-NEXT: str s0, [sp] |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x9] |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB4_3 |
| ; VBITS_GE_512-NEXT: b .LBB4_4 |
| ; VBITS_GE_512-NEXT: .LBB4_2: |
| ; VBITS_GE_512-NEXT: movi v0.2d, #0000000000000000 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB4_4 |
| ; VBITS_GE_512-NEXT: .LBB4_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: .LBB4_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB4_20 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB4_21 |
| ; VBITS_GE_512-NEXT: .LBB4_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB4_22 |
| ; VBITS_GE_512-NEXT: .LBB4_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB4_23 |
| ; VBITS_GE_512-NEXT: .LBB4_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB4_24 |
| ; VBITS_GE_512-NEXT: .LBB4_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB4_25 |
| ; VBITS_GE_512-NEXT: .LBB4_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB4_26 |
| ; VBITS_GE_512-NEXT: .LBB4_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB4_27 |
| ; VBITS_GE_512-NEXT: .LBB4_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB4_28 |
| ; VBITS_GE_512-NEXT: .LBB4_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB4_29 |
| ; VBITS_GE_512-NEXT: .LBB4_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB4_30 |
| ; VBITS_GE_512-NEXT: .LBB4_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB4_31 |
| ; VBITS_GE_512-NEXT: .LBB4_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB4_32 |
| ; VBITS_GE_512-NEXT: .LBB4_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB4_19 |
| ; VBITS_GE_512-NEXT: .LBB4_18: // %cond.load57 |
| ; VBITS_GE_512-NEXT: mov w8, #15 // =0xf |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0] |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: .LBB4_19: // %else58 |
| ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_512-NEXT: mov sp, x29 |
| ; VBITS_GE_512-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB4_20: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB4_6 |
| ; VBITS_GE_512-NEXT: .LBB4_21: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB4_7 |
| ; VBITS_GE_512-NEXT: .LBB4_22: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB4_8 |
| ; VBITS_GE_512-NEXT: .LBB4_23: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB4_9 |
| ; VBITS_GE_512-NEXT: .LBB4_24: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB4_10 |
| ; VBITS_GE_512-NEXT: .LBB4_25: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB4_11 |
| ; VBITS_GE_512-NEXT: .LBB4_26: // %cond.load29 |
| ; VBITS_GE_512-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB4_12 |
| ; VBITS_GE_512-NEXT: .LBB4_27: // %cond.load33 |
| ; VBITS_GE_512-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB4_13 |
| ; VBITS_GE_512-NEXT: .LBB4_28: // %cond.load37 |
| ; VBITS_GE_512-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB4_14 |
| ; VBITS_GE_512-NEXT: .LBB4_29: // %cond.load41 |
| ; VBITS_GE_512-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB4_15 |
| ; VBITS_GE_512-NEXT: .LBB4_30: // %cond.load45 |
| ; VBITS_GE_512-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB4_16 |
| ; VBITS_GE_512-NEXT: .LBB4_31: // %cond.load49 |
| ; VBITS_GE_512-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB4_17 |
| ; VBITS_GE_512-NEXT: .LBB4_32: // %cond.load53 |
| ; VBITS_GE_512-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: ldr s1, [x0], #4 |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB4_18 |
| ; VBITS_GE_512-NEXT: b .LBB4_19 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_v16f32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: sub sp, sp, #16 |
| ; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: ptrue p3.s |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s |
| ; CHECK-EXPAND-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-EXPAND-NEXT: umov w8, v0.b[0] |
| ; CHECK-EXPAND-NEXT: umov w9, v0.b[1] |
| ; CHECK-EXPAND-NEXT: umov w10, v0.b[2] |
| ; CHECK-EXPAND-NEXT: umov w11, v0.b[3] |
| ; CHECK-EXPAND-NEXT: and w8, w8, #0x1 |
| ; CHECK-EXPAND-NEXT: bfi w8, w9, #1, #1 |
| ; CHECK-EXPAND-NEXT: umov w9, v0.b[4] |
| ; CHECK-EXPAND-NEXT: bfi w8, w10, #2, #1 |
| ; CHECK-EXPAND-NEXT: umov w10, v0.b[5] |
| ; CHECK-EXPAND-NEXT: bfi w8, w11, #3, #1 |
| ; CHECK-EXPAND-NEXT: mov x11, #8 // =0x8 |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1, x11, lsl #2] |
| ; CHECK-EXPAND-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2] |
| ; CHECK-EXPAND-NEXT: bfi w8, w9, #4, #1 |
| ; CHECK-EXPAND-NEXT: umov w9, v0.b[6] |
| ; CHECK-EXPAND-NEXT: bfi w8, w10, #5, #1 |
| ; CHECK-EXPAND-NEXT: umov w10, v0.b[7] |
| ; CHECK-EXPAND-NEXT: fcmeq p2.s, p0/z, z2.s, z1.s |
| ; CHECK-EXPAND-NEXT: bfi w8, w9, #6, #1 |
| ; CHECK-EXPAND-NEXT: orr w8, w8, w10, lsl #7 |
| ; CHECK-EXPAND-NEXT: cntp x10, p1, p1.s |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.s |
| ; CHECK-EXPAND-NEXT: and w8, w8, #0xff |
| ; CHECK-EXPAND-NEXT: fmov s0, w8 |
| ; CHECK-EXPAND-NEXT: whilelo p4.s, xzr, x10 |
| ; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s |
| ; CHECK-EXPAND-NEXT: whilelo p3.s, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p4/z, [x0] |
| ; CHECK-EXPAND-NEXT: fmov w8, s0 |
| ; CHECK-EXPAND-NEXT: expand z1.s, p1, z1.s |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p3/z, [x0, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: expand z0.s, p2, z0.s |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2, x11, lsl #2] |
| ; CHECK-EXPAND-NEXT: add sp, sp, #16 |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <16 x float>, ptr %ap |
| %b = load <16 x float>, ptr %bp |
| %mask = fcmp oeq <16 x float> %a, %b |
| %load = call <16 x float> @llvm.masked.expandload.v16f32(ptr %ap, <16 x i1> %mask, <16 x float> zeroinitializer) |
| store <16 x float> %load, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_v32f32(ptr %ap, ptr %bp, ptr %c) vscale_range(8,0) #0 { |
| ; CHECK-LABEL: masked_load_v32f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill |
| ; CHECK-NEXT: sub x9, sp, #288 |
| ; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: and sp, x9, #0xffffffffffffff80 |
| ; CHECK-NEXT: .cfi_def_cfa w29, 96 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: ptrue p0.s, vl32 |
| ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-NEXT: mov z5.b, z0.b[18] |
| ; CHECK-NEXT: mov z6.b, z0.b[19] |
| ; CHECK-NEXT: umov w13, v0.b[1] |
| ; CHECK-NEXT: umov w3, v0.b[7] |
| ; CHECK-NEXT: umov w4, v0.b[8] |
| ; CHECK-NEXT: mov z7.b, z0.b[20] |
| ; CHECK-NEXT: fmov w6, s0 |
| ; CHECK-NEXT: umov w5, v0.b[9] |
| ; CHECK-NEXT: umov w12, v0.b[2] |
| ; CHECK-NEXT: mov z16.b, z0.b[21] |
| ; CHECK-NEXT: fmov w20, s5 |
| ; CHECK-NEXT: fmov w21, s6 |
| ; CHECK-NEXT: umov w18, v0.b[10] |
| ; CHECK-NEXT: mov z17.b, z0.b[22] |
| ; CHECK-NEXT: fmov w22, s7 |
| ; CHECK-NEXT: and w6, w6, #0x1 |
| ; CHECK-NEXT: umov w11, v0.b[3] |
| ; CHECK-NEXT: umov w1, v0.b[11] |
| ; CHECK-NEXT: bfi w6, w13, #1, #1 |
| ; CHECK-NEXT: ubfiz w13, w3, #7, #1 |
| ; CHECK-NEXT: ubfiz w3, w4, #8, #1 |
| ; CHECK-NEXT: mov z18.b, z0.b[23] |
| ; CHECK-NEXT: fmov w23, s16 |
| ; CHECK-NEXT: ubfiz w4, w5, #9, #1 |
| ; CHECK-NEXT: ubfiz w5, w20, #18, #1 |
| ; CHECK-NEXT: ubfiz w20, w21, #19, #1 |
| ; CHECK-NEXT: umov w16, v0.b[12] |
| ; CHECK-NEXT: mov z19.b, z0.b[24] |
| ; CHECK-NEXT: fmov w24, s17 |
| ; CHECK-NEXT: bfi w6, w12, #2, #1 |
| ; CHECK-NEXT: orr w12, w13, w3 |
| ; CHECK-NEXT: ubfiz w13, w22, #20, #1 |
| ; CHECK-NEXT: umov w17, v0.b[13] |
| ; CHECK-NEXT: mov z20.b, z0.b[25] |
| ; CHECK-NEXT: orr w3, w5, w20 |
| ; CHECK-NEXT: ubfiz w18, w18, #10, #1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: mov z21.b, z0.b[26] |
| ; CHECK-NEXT: fmov w25, s18 |
| ; CHECK-NEXT: orr w12, w12, w4 |
| ; CHECK-NEXT: orr w13, w3, w13 |
| ; CHECK-NEXT: ubfiz w3, w23, #21, #1 |
| ; CHECK-NEXT: umov w14, v0.b[14] |
| ; CHECK-NEXT: fmov w26, s19 |
| ; CHECK-NEXT: ubfiz w1, w1, #11, #1 |
| ; CHECK-NEXT: bfi w6, w11, #3, #1 |
| ; CHECK-NEXT: orr w11, w12, w18 |
| ; CHECK-NEXT: ubfiz w12, w24, #22, #1 |
| ; CHECK-NEXT: umov w9, v0.b[5] |
| ; CHECK-NEXT: fmov w27, s20 |
| ; CHECK-NEXT: orr w13, w13, w3 |
| ; CHECK-NEXT: ubfiz w16, w16, #12, #1 |
| ; CHECK-NEXT: umov w8, v0.b[6] |
| ; CHECK-NEXT: fmov w28, s21 |
| ; CHECK-NEXT: orr w11, w11, w1 |
| ; CHECK-NEXT: orr w12, w13, w12 |
| ; CHECK-NEXT: ubfiz w13, w17, #13, #1 |
| ; CHECK-NEXT: ubfiz w17, w25, #23, #1 |
| ; CHECK-NEXT: mov z22.b, z0.b[27] |
| ; CHECK-NEXT: bfi w6, w10, #4, #1 |
| ; CHECK-NEXT: orr w10, w11, w16 |
| ; CHECK-NEXT: ubfiz w11, w26, #24, #1 |
| ; CHECK-NEXT: mov z24.b, z0.b[29] |
| ; CHECK-NEXT: orr w12, w12, w17 |
| ; CHECK-NEXT: orr w10, w10, w13 |
| ; CHECK-NEXT: ubfiz w13, w14, #14, #1 |
| ; CHECK-NEXT: ubfiz w14, w27, #25, #1 |
| ; CHECK-NEXT: orr w11, w12, w11 |
| ; CHECK-NEXT: bfi w6, w9, #5, #1 |
| ; CHECK-NEXT: ubfiz w9, w28, #26, #1 |
| ; CHECK-NEXT: str w8, [sp, #124] // 4-byte Spill |
| ; CHECK-NEXT: mov z23.b, z0.b[28] |
| ; CHECK-NEXT: fmov w30, s22 |
| ; CHECK-NEXT: orr w11, w11, w14 |
| ; CHECK-NEXT: umov w15, v0.b[15] |
| ; CHECK-NEXT: mov z3.b, z0.b[16] |
| ; CHECK-NEXT: orr w9, w11, w9 |
| ; CHECK-NEXT: ldr w11, [sp, #124] // 4-byte Reload |
| ; CHECK-NEXT: mov z4.b, z0.b[17] |
| ; CHECK-NEXT: mov z2.b, z0.b[30] |
| ; CHECK-NEXT: orr w10, w10, w13 |
| ; CHECK-NEXT: fmov w13, s24 |
| ; CHECK-NEXT: fmov w8, s23 |
| ; CHECK-NEXT: bfi w6, w11, #6, #1 |
| ; CHECK-NEXT: ubfiz w11, w30, #27, #1 |
| ; CHECK-NEXT: fmov w7, s3 |
| ; CHECK-NEXT: ubfiz w12, w15, #15, #1 |
| ; CHECK-NEXT: fmov w19, s4 |
| ; CHECK-NEXT: mov z1.b, z0.b[31] |
| ; CHECK-NEXT: orr w9, w9, w11 |
| ; CHECK-NEXT: ubfiz w11, w13, #29, #1 |
| ; CHECK-NEXT: fmov w13, s2 |
| ; CHECK-NEXT: ubfiz w8, w8, #28, #1 |
| ; CHECK-NEXT: orr w10, w10, w12 |
| ; CHECK-NEXT: ubfiz w12, w7, #16, #1 |
| ; CHECK-NEXT: ubfiz w14, w19, #17, #1 |
| ; CHECK-NEXT: orr w8, w9, w8 |
| ; CHECK-NEXT: ubfiz w9, w13, #30, #1 |
| ; CHECK-NEXT: orr w10, w10, w12 |
| ; CHECK-NEXT: orr w8, w8, w11 |
| ; CHECK-NEXT: orr w10, w10, w14 |
| ; CHECK-NEXT: orr w8, w8, w9 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: orr w10, w6, w10 |
| ; CHECK-NEXT: orr w8, w10, w8 |
| ; CHECK-NEXT: orr w8, w8, w9, lsl #31 |
| ; CHECK-NEXT: tbz w8, #0, .LBB5_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ldr s0, [x0], #4 |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #240] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #224] |
| ; CHECK-NEXT: add x9, sp, #128 |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #208] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #192] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #176] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #160] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #144] |
| ; CHECK-NEXT: str xzr, [sp, #136] |
| ; CHECK-NEXT: str wzr, [sp, #132] |
| ; CHECK-NEXT: str s0, [sp, #128] |
| ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9] |
| ; CHECK-NEXT: tbnz w8, #1, .LBB5_3 |
| ; CHECK-NEXT: b .LBB5_4 |
| ; CHECK-NEXT: .LBB5_2: |
| ; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-NEXT: tbz w8, #1, .LBB5_4 |
| ; CHECK-NEXT: .LBB5_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: .LBB5_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB5_36 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB5_37 |
| ; CHECK-NEXT: .LBB5_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB5_38 |
| ; CHECK-NEXT: .LBB5_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB5_39 |
| ; CHECK-NEXT: .LBB5_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB5_40 |
| ; CHECK-NEXT: .LBB5_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB5_41 |
| ; CHECK-NEXT: .LBB5_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB5_42 |
| ; CHECK-NEXT: .LBB5_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB5_43 |
| ; CHECK-NEXT: .LBB5_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB5_44 |
| ; CHECK-NEXT: .LBB5_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB5_45 |
| ; CHECK-NEXT: .LBB5_14: // %else42 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB5_46 |
| ; CHECK-NEXT: .LBB5_15: // %else46 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB5_47 |
| ; CHECK-NEXT: .LBB5_16: // %else50 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB5_48 |
| ; CHECK-NEXT: .LBB5_17: // %else54 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB5_49 |
| ; CHECK-NEXT: .LBB5_18: // %else58 |
| ; CHECK-NEXT: tbnz w8, #16, .LBB5_50 |
| ; CHECK-NEXT: .LBB5_19: // %else62 |
| ; CHECK-NEXT: tbnz w8, #17, .LBB5_51 |
| ; CHECK-NEXT: .LBB5_20: // %else66 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB5_52 |
| ; CHECK-NEXT: .LBB5_21: // %else70 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB5_53 |
| ; CHECK-NEXT: .LBB5_22: // %else74 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB5_54 |
| ; CHECK-NEXT: .LBB5_23: // %else78 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB5_55 |
| ; CHECK-NEXT: .LBB5_24: // %else82 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB5_56 |
| ; CHECK-NEXT: .LBB5_25: // %else86 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB5_57 |
| ; CHECK-NEXT: .LBB5_26: // %else90 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB5_58 |
| ; CHECK-NEXT: .LBB5_27: // %else94 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB5_59 |
| ; CHECK-NEXT: .LBB5_28: // %else98 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB5_60 |
| ; CHECK-NEXT: .LBB5_29: // %else102 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB5_61 |
| ; CHECK-NEXT: .LBB5_30: // %else106 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB5_62 |
| ; CHECK-NEXT: .LBB5_31: // %else110 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB5_63 |
| ; CHECK-NEXT: .LBB5_32: // %else114 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB5_64 |
| ; CHECK-NEXT: .LBB5_33: // %else118 |
| ; CHECK-NEXT: tbz w8, #31, .LBB5_35 |
| ; CHECK-NEXT: .LBB5_34: // %cond.load121 |
| ; CHECK-NEXT: mov w8, #31 // =0x1f |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w8 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0] |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: .LBB5_35: // %else122 |
| ; CHECK-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB5_36: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #3, .LBB5_6 |
| ; CHECK-NEXT: .LBB5_37: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #4, .LBB5_7 |
| ; CHECK-NEXT: .LBB5_38: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #5, .LBB5_8 |
| ; CHECK-NEXT: .LBB5_39: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #6, .LBB5_9 |
| ; CHECK-NEXT: .LBB5_40: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #7, .LBB5_10 |
| ; CHECK-NEXT: .LBB5_41: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #8, .LBB5_11 |
| ; CHECK-NEXT: .LBB5_42: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #9, .LBB5_12 |
| ; CHECK-NEXT: .LBB5_43: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #10, .LBB5_13 |
| ; CHECK-NEXT: .LBB5_44: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #11, .LBB5_14 |
| ; CHECK-NEXT: .LBB5_45: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #12, .LBB5_15 |
| ; CHECK-NEXT: .LBB5_46: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #13, .LBB5_16 |
| ; CHECK-NEXT: .LBB5_47: // %cond.load49 |
| ; CHECK-NEXT: mov w9, #13 // =0xd |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #14, .LBB5_17 |
| ; CHECK-NEXT: .LBB5_48: // %cond.load53 |
| ; CHECK-NEXT: mov w9, #14 // =0xe |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #15, .LBB5_18 |
| ; CHECK-NEXT: .LBB5_49: // %cond.load57 |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #16, .LBB5_19 |
| ; CHECK-NEXT: .LBB5_50: // %cond.load61 |
| ; CHECK-NEXT: mov w9, #16 // =0x10 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #17, .LBB5_20 |
| ; CHECK-NEXT: .LBB5_51: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #18, .LBB5_21 |
| ; CHECK-NEXT: .LBB5_52: // %cond.load69 |
| ; CHECK-NEXT: mov w9, #18 // =0x12 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #19, .LBB5_22 |
| ; CHECK-NEXT: .LBB5_53: // %cond.load73 |
| ; CHECK-NEXT: mov w9, #19 // =0x13 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #20, .LBB5_23 |
| ; CHECK-NEXT: .LBB5_54: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #21, .LBB5_24 |
| ; CHECK-NEXT: .LBB5_55: // %cond.load81 |
| ; CHECK-NEXT: mov w9, #21 // =0x15 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #22, .LBB5_25 |
| ; CHECK-NEXT: .LBB5_56: // %cond.load85 |
| ; CHECK-NEXT: mov w9, #22 // =0x16 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #23, .LBB5_26 |
| ; CHECK-NEXT: .LBB5_57: // %cond.load89 |
| ; CHECK-NEXT: mov w9, #23 // =0x17 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #24, .LBB5_27 |
| ; CHECK-NEXT: .LBB5_58: // %cond.load93 |
| ; CHECK-NEXT: mov w9, #24 // =0x18 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #25, .LBB5_28 |
| ; CHECK-NEXT: .LBB5_59: // %cond.load97 |
| ; CHECK-NEXT: mov w9, #25 // =0x19 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #26, .LBB5_29 |
| ; CHECK-NEXT: .LBB5_60: // %cond.load101 |
| ; CHECK-NEXT: mov w9, #26 // =0x1a |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #27, .LBB5_30 |
| ; CHECK-NEXT: .LBB5_61: // %cond.load105 |
| ; CHECK-NEXT: mov w9, #27 // =0x1b |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #28, .LBB5_31 |
| ; CHECK-NEXT: .LBB5_62: // %cond.load109 |
| ; CHECK-NEXT: mov w9, #28 // =0x1c |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #29, .LBB5_32 |
| ; CHECK-NEXT: .LBB5_63: // %cond.load113 |
| ; CHECK-NEXT: mov w9, #29 // =0x1d |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #30, .LBB5_33 |
| ; CHECK-NEXT: .LBB5_64: // %cond.load117 |
| ; CHECK-NEXT: mov w9, #30 // =0x1e |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbnz w8, #31, .LBB5_34 |
| ; CHECK-NEXT: b .LBB5_35 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_v32f32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl32 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s |
| ; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <32 x float>, ptr %ap |
| %b = load <32 x float>, ptr %bp |
| %mask = fcmp oeq <32 x float> %a, %b |
| %load = call <32 x float> @llvm.masked.expandload.v32f32(ptr %ap, <32 x i1> %mask, <32 x float> zeroinitializer) |
| store <32 x float> %load, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_v64f32(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_v64f32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill |
| ; CHECK-NEXT: sub x9, sp, #672 |
| ; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: and sp, x9, #0xffffffffffffff00 |
| ; CHECK-NEXT: .cfi_def_cfa w29, 96 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: ptrue p0.s, vl64 |
| ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s |
| ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-NEXT: umov w12, v0.b[1] |
| ; CHECK-NEXT: mov z3.b, z0.b[18] |
| ; CHECK-NEXT: mov z4.b, z0.b[19] |
| ; CHECK-NEXT: fmov w28, s0 |
| ; CHECK-NEXT: umov w17, v0.b[7] |
| ; CHECK-NEXT: umov w4, v0.b[8] |
| ; CHECK-NEXT: mov z5.b, z0.b[20] |
| ; CHECK-NEXT: umov w13, v0.b[2] |
| ; CHECK-NEXT: umov w3, v0.b[9] |
| ; CHECK-NEXT: mov z1.b, z0.b[16] |
| ; CHECK-NEXT: mov z6.b, z0.b[21] |
| ; CHECK-NEXT: fmov w19, s3 |
| ; CHECK-NEXT: fmov w20, s4 |
| ; CHECK-NEXT: and x28, x28, #0x1 |
| ; CHECK-NEXT: umov w18, v0.b[10] |
| ; CHECK-NEXT: mov z2.b, z0.b[17] |
| ; CHECK-NEXT: mov z7.b, z0.b[22] |
| ; CHECK-NEXT: fmov w21, s5 |
| ; CHECK-NEXT: bfi x28, x12, #1, #1 |
| ; CHECK-NEXT: umov w11, v0.b[3] |
| ; CHECK-NEXT: umov w1, v0.b[11] |
| ; CHECK-NEXT: fmov w6, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[23] |
| ; CHECK-NEXT: fmov w22, s6 |
| ; CHECK-NEXT: ubfiz x12, x17, #7, #1 |
| ; CHECK-NEXT: ubfiz x17, x4, #8, #1 |
| ; CHECK-NEXT: ubfiz x4, x19, #18, #1 |
| ; CHECK-NEXT: ubfiz x19, x20, #19, #1 |
| ; CHECK-NEXT: bfi x28, x13, #2, #1 |
| ; CHECK-NEXT: ubfiz x13, x3, #9, #1 |
| ; CHECK-NEXT: umov w15, v0.b[12] |
| ; CHECK-NEXT: fmov w7, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[24] |
| ; CHECK-NEXT: fmov w23, s7 |
| ; CHECK-NEXT: orr x12, x12, x17 |
| ; CHECK-NEXT: ubfiz x3, x21, #20, #1 |
| ; CHECK-NEXT: fmov w24, s1 |
| ; CHECK-NEXT: orr x17, x4, x19 |
| ; CHECK-NEXT: orr x12, x12, x13 |
| ; CHECK-NEXT: ubfiz x13, x18, #10, #1 |
| ; CHECK-NEXT: ubfiz x18, x22, #21, #1 |
| ; CHECK-NEXT: orr x17, x17, x3 |
| ; CHECK-NEXT: bfi x28, x11, #3, #1 |
| ; CHECK-NEXT: ubfiz x11, x1, #11, #1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: umov w8, v0.b[5] |
| ; CHECK-NEXT: umov w16, v0.b[13] |
| ; CHECK-NEXT: fmov w25, s2 |
| ; CHECK-NEXT: orr x12, x12, x13 |
| ; CHECK-NEXT: orr x13, x17, x18 |
| ; CHECK-NEXT: ubfiz x17, x23, #22, #1 |
| ; CHECK-NEXT: orr x11, x12, x11 |
| ; CHECK-NEXT: ubfiz x12, x15, #12, #1 |
| ; CHECK-NEXT: ubfiz x15, x24, #23, #1 |
| ; CHECK-NEXT: umov w14, v0.b[14] |
| ; CHECK-NEXT: mov z16.b, z0.b[25] |
| ; CHECK-NEXT: orr x13, x13, x17 |
| ; CHECK-NEXT: umov w5, v0.b[15] |
| ; CHECK-NEXT: orr x11, x11, x12 |
| ; CHECK-NEXT: orr x12, x13, x15 |
| ; CHECK-NEXT: ubfiz x13, x25, #24, #1 |
| ; CHECK-NEXT: str x8, [sp, #240] // 8-byte Spill |
| ; CHECK-NEXT: umov w8, v0.b[6] |
| ; CHECK-NEXT: mov z4.b, z0.b[26] |
| ; CHECK-NEXT: bfi x28, x10, #4, #1 |
| ; CHECK-NEXT: ubfiz x10, x16, #13, #1 |
| ; CHECK-NEXT: mov z5.b, z0.b[27] |
| ; CHECK-NEXT: fmov w26, s16 |
| ; CHECK-NEXT: orr x12, x12, x13 |
| ; CHECK-NEXT: ldr x13, [sp, #240] // 8-byte Reload |
| ; CHECK-NEXT: mov z6.b, z0.b[28] |
| ; CHECK-NEXT: orr x10, x11, x10 |
| ; CHECK-NEXT: ubfiz x11, x14, #14, #1 |
| ; CHECK-NEXT: mov z7.b, z0.b[29] |
| ; CHECK-NEXT: fmov w27, s4 |
| ; CHECK-NEXT: bfi x28, x13, #5, #1 |
| ; CHECK-NEXT: ubfiz x13, x5, #15, #1 |
| ; CHECK-NEXT: str x8, [sp, #248] // 8-byte Spill |
| ; CHECK-NEXT: mov z17.b, z0.b[30] |
| ; CHECK-NEXT: fmov w30, s5 |
| ; CHECK-NEXT: ubfiz x14, x26, #25, #1 |
| ; CHECK-NEXT: orr x10, x10, x11 |
| ; CHECK-NEXT: fmov w8, s6 |
| ; CHECK-NEXT: fmov w9, s7 |
| ; CHECK-NEXT: orr x10, x10, x13 |
| ; CHECK-NEXT: ldr x13, [sp, #248] // 8-byte Reload |
| ; CHECK-NEXT: orr x11, x12, x14 |
| ; CHECK-NEXT: ubfiz x12, x27, #26, #1 |
| ; CHECK-NEXT: fmov w20, s17 |
| ; CHECK-NEXT: mov z3.b, z0.b[31] |
| ; CHECK-NEXT: bfi x28, x13, #6, #1 |
| ; CHECK-NEXT: ubfiz x13, x30, #27, #1 |
| ; CHECK-NEXT: ubfiz x8, x8, #28, #1 |
| ; CHECK-NEXT: orr x11, x11, x12 |
| ; CHECK-NEXT: ubfiz x12, x6, #16, #1 |
| ; CHECK-NEXT: ubfiz x9, x9, #29, #1 |
| ; CHECK-NEXT: orr x11, x11, x13 |
| ; CHECK-NEXT: ubfiz x13, x7, #17, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[32] |
| ; CHECK-NEXT: orr x10, x10, x12 |
| ; CHECK-NEXT: orr x8, x11, x8 |
| ; CHECK-NEXT: ubfiz x11, x20, #30, #1 |
| ; CHECK-NEXT: fmov w12, s3 |
| ; CHECK-NEXT: orr x10, x10, x13 |
| ; CHECK-NEXT: orr x8, x8, x9 |
| ; CHECK-NEXT: orr x9, x28, x10 |
| ; CHECK-NEXT: orr x8, x8, x11 |
| ; CHECK-NEXT: mov z1.b, z0.b[33] |
| ; CHECK-NEXT: orr x8, x9, x8 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[34] |
| ; CHECK-NEXT: lsl w10, w12, #31 |
| ; CHECK-NEXT: orr x8, x8, x10 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #32 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[35] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #33 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[36] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #34 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[37] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #35 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[38] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #36 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[39] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #37 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[40] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #38 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[41] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #39 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[42] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #40 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[43] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #41 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[44] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #42 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[45] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #43 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[46] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #44 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[47] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #45 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[48] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #46 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[49] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #47 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[50] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #48 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[51] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #49 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[52] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #50 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[53] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #51 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[54] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #52 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[55] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #53 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[56] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #54 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[57] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #55 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[58] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #56 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[59] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #57 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[60] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #58 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[61] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: fmov w10, s1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #59 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[62] |
| ; CHECK-NEXT: mov z0.b, z0.b[63] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #60 |
| ; CHECK-NEXT: and w9, w10, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #61 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #62 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #63 |
| ; CHECK-NEXT: tbz w8, #0, .LBB6_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ldr s0, [x0], #4 |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #496] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #480] |
| ; CHECK-NEXT: add x9, sp, #256 |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #464] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #448] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #432] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #416] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #400] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #384] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #368] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #352] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #336] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #320] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #304] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #288] |
| ; CHECK-NEXT: stp xzr, xzr, [sp, #272] |
| ; CHECK-NEXT: str xzr, [sp, #264] |
| ; CHECK-NEXT: str wzr, [sp, #260] |
| ; CHECK-NEXT: str s0, [sp, #256] |
| ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9] |
| ; CHECK-NEXT: tbnz w8, #1, .LBB6_3 |
| ; CHECK-NEXT: b .LBB6_4 |
| ; CHECK-NEXT: .LBB6_2: |
| ; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| ; CHECK-NEXT: tbz w8, #1, .LBB6_4 |
| ; CHECK-NEXT: .LBB6_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: .LBB6_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB6_68 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB6_69 |
| ; CHECK-NEXT: .LBB6_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB6_70 |
| ; CHECK-NEXT: .LBB6_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB6_71 |
| ; CHECK-NEXT: .LBB6_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB6_72 |
| ; CHECK-NEXT: .LBB6_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB6_73 |
| ; CHECK-NEXT: .LBB6_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB6_74 |
| ; CHECK-NEXT: .LBB6_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB6_75 |
| ; CHECK-NEXT: .LBB6_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB6_76 |
| ; CHECK-NEXT: .LBB6_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB6_77 |
| ; CHECK-NEXT: .LBB6_14: // %else42 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB6_78 |
| ; CHECK-NEXT: .LBB6_15: // %else46 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB6_79 |
| ; CHECK-NEXT: .LBB6_16: // %else50 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB6_80 |
| ; CHECK-NEXT: .LBB6_17: // %else54 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB6_81 |
| ; CHECK-NEXT: .LBB6_18: // %else58 |
| ; CHECK-NEXT: tbnz w8, #16, .LBB6_82 |
| ; CHECK-NEXT: .LBB6_19: // %else62 |
| ; CHECK-NEXT: tbnz w8, #17, .LBB6_83 |
| ; CHECK-NEXT: .LBB6_20: // %else66 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB6_84 |
| ; CHECK-NEXT: .LBB6_21: // %else70 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB6_85 |
| ; CHECK-NEXT: .LBB6_22: // %else74 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB6_86 |
| ; CHECK-NEXT: .LBB6_23: // %else78 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB6_87 |
| ; CHECK-NEXT: .LBB6_24: // %else82 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB6_88 |
| ; CHECK-NEXT: .LBB6_25: // %else86 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB6_89 |
| ; CHECK-NEXT: .LBB6_26: // %else90 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB6_90 |
| ; CHECK-NEXT: .LBB6_27: // %else94 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB6_91 |
| ; CHECK-NEXT: .LBB6_28: // %else98 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB6_92 |
| ; CHECK-NEXT: .LBB6_29: // %else102 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB6_93 |
| ; CHECK-NEXT: .LBB6_30: // %else106 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB6_94 |
| ; CHECK-NEXT: .LBB6_31: // %else110 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB6_95 |
| ; CHECK-NEXT: .LBB6_32: // %else114 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB6_96 |
| ; CHECK-NEXT: .LBB6_33: // %else118 |
| ; CHECK-NEXT: tbnz w8, #31, .LBB6_97 |
| ; CHECK-NEXT: .LBB6_34: // %else122 |
| ; CHECK-NEXT: tbnz x8, #32, .LBB6_98 |
| ; CHECK-NEXT: .LBB6_35: // %else126 |
| ; CHECK-NEXT: tbnz x8, #33, .LBB6_99 |
| ; CHECK-NEXT: .LBB6_36: // %else130 |
| ; CHECK-NEXT: tbnz x8, #34, .LBB6_100 |
| ; CHECK-NEXT: .LBB6_37: // %else134 |
| ; CHECK-NEXT: tbnz x8, #35, .LBB6_101 |
| ; CHECK-NEXT: .LBB6_38: // %else138 |
| ; CHECK-NEXT: tbnz x8, #36, .LBB6_102 |
| ; CHECK-NEXT: .LBB6_39: // %else142 |
| ; CHECK-NEXT: tbnz x8, #37, .LBB6_103 |
| ; CHECK-NEXT: .LBB6_40: // %else146 |
| ; CHECK-NEXT: tbnz x8, #38, .LBB6_104 |
| ; CHECK-NEXT: .LBB6_41: // %else150 |
| ; CHECK-NEXT: tbnz x8, #39, .LBB6_105 |
| ; CHECK-NEXT: .LBB6_42: // %else154 |
| ; CHECK-NEXT: tbnz x8, #40, .LBB6_106 |
| ; CHECK-NEXT: .LBB6_43: // %else158 |
| ; CHECK-NEXT: tbnz x8, #41, .LBB6_107 |
| ; CHECK-NEXT: .LBB6_44: // %else162 |
| ; CHECK-NEXT: tbnz x8, #42, .LBB6_108 |
| ; CHECK-NEXT: .LBB6_45: // %else166 |
| ; CHECK-NEXT: tbnz x8, #43, .LBB6_109 |
| ; CHECK-NEXT: .LBB6_46: // %else170 |
| ; CHECK-NEXT: tbnz x8, #44, .LBB6_110 |
| ; CHECK-NEXT: .LBB6_47: // %else174 |
| ; CHECK-NEXT: tbnz x8, #45, .LBB6_111 |
| ; CHECK-NEXT: .LBB6_48: // %else178 |
| ; CHECK-NEXT: tbnz x8, #46, .LBB6_112 |
| ; CHECK-NEXT: .LBB6_49: // %else182 |
| ; CHECK-NEXT: tbnz x8, #47, .LBB6_113 |
| ; CHECK-NEXT: .LBB6_50: // %else186 |
| ; CHECK-NEXT: tbnz x8, #48, .LBB6_114 |
| ; CHECK-NEXT: .LBB6_51: // %else190 |
| ; CHECK-NEXT: tbnz x8, #49, .LBB6_115 |
| ; CHECK-NEXT: .LBB6_52: // %else194 |
| ; CHECK-NEXT: tbnz x8, #50, .LBB6_116 |
| ; CHECK-NEXT: .LBB6_53: // %else198 |
| ; CHECK-NEXT: tbnz x8, #51, .LBB6_117 |
| ; CHECK-NEXT: .LBB6_54: // %else202 |
| ; CHECK-NEXT: tbnz x8, #52, .LBB6_118 |
| ; CHECK-NEXT: .LBB6_55: // %else206 |
| ; CHECK-NEXT: tbnz x8, #53, .LBB6_119 |
| ; CHECK-NEXT: .LBB6_56: // %else210 |
| ; CHECK-NEXT: tbnz x8, #54, .LBB6_120 |
| ; CHECK-NEXT: .LBB6_57: // %else214 |
| ; CHECK-NEXT: tbnz x8, #55, .LBB6_121 |
| ; CHECK-NEXT: .LBB6_58: // %else218 |
| ; CHECK-NEXT: tbnz x8, #56, .LBB6_122 |
| ; CHECK-NEXT: .LBB6_59: // %else222 |
| ; CHECK-NEXT: tbnz x8, #57, .LBB6_123 |
| ; CHECK-NEXT: .LBB6_60: // %else226 |
| ; CHECK-NEXT: tbnz x8, #58, .LBB6_124 |
| ; CHECK-NEXT: .LBB6_61: // %else230 |
| ; CHECK-NEXT: tbnz x8, #59, .LBB6_125 |
| ; CHECK-NEXT: .LBB6_62: // %else234 |
| ; CHECK-NEXT: tbnz x8, #60, .LBB6_126 |
| ; CHECK-NEXT: .LBB6_63: // %else238 |
| ; CHECK-NEXT: tbnz x8, #61, .LBB6_127 |
| ; CHECK-NEXT: .LBB6_64: // %else242 |
| ; CHECK-NEXT: tbnz x8, #62, .LBB6_128 |
| ; CHECK-NEXT: .LBB6_65: // %else246 |
| ; CHECK-NEXT: tbz x8, #63, .LBB6_67 |
| ; CHECK-NEXT: .LBB6_66: // %cond.load249 |
| ; CHECK-NEXT: mov w8, #63 // =0x3f |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w8 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0] |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: .LBB6_67: // %else250 |
| ; CHECK-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB6_68: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #3, .LBB6_6 |
| ; CHECK-NEXT: .LBB6_69: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #4, .LBB6_7 |
| ; CHECK-NEXT: .LBB6_70: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #5, .LBB6_8 |
| ; CHECK-NEXT: .LBB6_71: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #6, .LBB6_9 |
| ; CHECK-NEXT: .LBB6_72: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #7, .LBB6_10 |
| ; CHECK-NEXT: .LBB6_73: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #8, .LBB6_11 |
| ; CHECK-NEXT: .LBB6_74: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #9, .LBB6_12 |
| ; CHECK-NEXT: .LBB6_75: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #10, .LBB6_13 |
| ; CHECK-NEXT: .LBB6_76: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #11, .LBB6_14 |
| ; CHECK-NEXT: .LBB6_77: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #12, .LBB6_15 |
| ; CHECK-NEXT: .LBB6_78: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #13, .LBB6_16 |
| ; CHECK-NEXT: .LBB6_79: // %cond.load49 |
| ; CHECK-NEXT: mov w9, #13 // =0xd |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #14, .LBB6_17 |
| ; CHECK-NEXT: .LBB6_80: // %cond.load53 |
| ; CHECK-NEXT: mov w9, #14 // =0xe |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #15, .LBB6_18 |
| ; CHECK-NEXT: .LBB6_81: // %cond.load57 |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #16, .LBB6_19 |
| ; CHECK-NEXT: .LBB6_82: // %cond.load61 |
| ; CHECK-NEXT: mov w9, #16 // =0x10 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #17, .LBB6_20 |
| ; CHECK-NEXT: .LBB6_83: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #18, .LBB6_21 |
| ; CHECK-NEXT: .LBB6_84: // %cond.load69 |
| ; CHECK-NEXT: mov w9, #18 // =0x12 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #19, .LBB6_22 |
| ; CHECK-NEXT: .LBB6_85: // %cond.load73 |
| ; CHECK-NEXT: mov w9, #19 // =0x13 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #20, .LBB6_23 |
| ; CHECK-NEXT: .LBB6_86: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #21, .LBB6_24 |
| ; CHECK-NEXT: .LBB6_87: // %cond.load81 |
| ; CHECK-NEXT: mov w9, #21 // =0x15 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #22, .LBB6_25 |
| ; CHECK-NEXT: .LBB6_88: // %cond.load85 |
| ; CHECK-NEXT: mov w9, #22 // =0x16 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #23, .LBB6_26 |
| ; CHECK-NEXT: .LBB6_89: // %cond.load89 |
| ; CHECK-NEXT: mov w9, #23 // =0x17 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #24, .LBB6_27 |
| ; CHECK-NEXT: .LBB6_90: // %cond.load93 |
| ; CHECK-NEXT: mov w9, #24 // =0x18 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #25, .LBB6_28 |
| ; CHECK-NEXT: .LBB6_91: // %cond.load97 |
| ; CHECK-NEXT: mov w9, #25 // =0x19 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #26, .LBB6_29 |
| ; CHECK-NEXT: .LBB6_92: // %cond.load101 |
| ; CHECK-NEXT: mov w9, #26 // =0x1a |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #27, .LBB6_30 |
| ; CHECK-NEXT: .LBB6_93: // %cond.load105 |
| ; CHECK-NEXT: mov w9, #27 // =0x1b |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #28, .LBB6_31 |
| ; CHECK-NEXT: .LBB6_94: // %cond.load109 |
| ; CHECK-NEXT: mov w9, #28 // =0x1c |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #29, .LBB6_32 |
| ; CHECK-NEXT: .LBB6_95: // %cond.load113 |
| ; CHECK-NEXT: mov w9, #29 // =0x1d |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #30, .LBB6_33 |
| ; CHECK-NEXT: .LBB6_96: // %cond.load117 |
| ; CHECK-NEXT: mov w9, #30 // =0x1e |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz w8, #31, .LBB6_34 |
| ; CHECK-NEXT: .LBB6_97: // %cond.load121 |
| ; CHECK-NEXT: mov w9, #31 // =0x1f |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #32, .LBB6_35 |
| ; CHECK-NEXT: .LBB6_98: // %cond.load125 |
| ; CHECK-NEXT: mov w9, #32 // =0x20 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #33, .LBB6_36 |
| ; CHECK-NEXT: .LBB6_99: // %cond.load129 |
| ; CHECK-NEXT: mov w9, #33 // =0x21 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #34, .LBB6_37 |
| ; CHECK-NEXT: .LBB6_100: // %cond.load133 |
| ; CHECK-NEXT: mov w9, #34 // =0x22 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #35, .LBB6_38 |
| ; CHECK-NEXT: .LBB6_101: // %cond.load137 |
| ; CHECK-NEXT: mov w9, #35 // =0x23 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #36, .LBB6_39 |
| ; CHECK-NEXT: .LBB6_102: // %cond.load141 |
| ; CHECK-NEXT: mov w9, #36 // =0x24 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #37, .LBB6_40 |
| ; CHECK-NEXT: .LBB6_103: // %cond.load145 |
| ; CHECK-NEXT: mov w9, #37 // =0x25 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #38, .LBB6_41 |
| ; CHECK-NEXT: .LBB6_104: // %cond.load149 |
| ; CHECK-NEXT: mov w9, #38 // =0x26 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #39, .LBB6_42 |
| ; CHECK-NEXT: .LBB6_105: // %cond.load153 |
| ; CHECK-NEXT: mov w9, #39 // =0x27 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #40, .LBB6_43 |
| ; CHECK-NEXT: .LBB6_106: // %cond.load157 |
| ; CHECK-NEXT: mov w9, #40 // =0x28 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #41, .LBB6_44 |
| ; CHECK-NEXT: .LBB6_107: // %cond.load161 |
| ; CHECK-NEXT: mov w9, #41 // =0x29 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #42, .LBB6_45 |
| ; CHECK-NEXT: .LBB6_108: // %cond.load165 |
| ; CHECK-NEXT: mov w9, #42 // =0x2a |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #43, .LBB6_46 |
| ; CHECK-NEXT: .LBB6_109: // %cond.load169 |
| ; CHECK-NEXT: mov w9, #43 // =0x2b |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #44, .LBB6_47 |
| ; CHECK-NEXT: .LBB6_110: // %cond.load173 |
| ; CHECK-NEXT: mov w9, #44 // =0x2c |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #45, .LBB6_48 |
| ; CHECK-NEXT: .LBB6_111: // %cond.load177 |
| ; CHECK-NEXT: mov w9, #45 // =0x2d |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #46, .LBB6_49 |
| ; CHECK-NEXT: .LBB6_112: // %cond.load181 |
| ; CHECK-NEXT: mov w9, #46 // =0x2e |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #47, .LBB6_50 |
| ; CHECK-NEXT: .LBB6_113: // %cond.load185 |
| ; CHECK-NEXT: mov w9, #47 // =0x2f |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #48, .LBB6_51 |
| ; CHECK-NEXT: .LBB6_114: // %cond.load189 |
| ; CHECK-NEXT: mov w9, #48 // =0x30 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #49, .LBB6_52 |
| ; CHECK-NEXT: .LBB6_115: // %cond.load193 |
| ; CHECK-NEXT: mov w9, #49 // =0x31 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #50, .LBB6_53 |
| ; CHECK-NEXT: .LBB6_116: // %cond.load197 |
| ; CHECK-NEXT: mov w9, #50 // =0x32 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #51, .LBB6_54 |
| ; CHECK-NEXT: .LBB6_117: // %cond.load201 |
| ; CHECK-NEXT: mov w9, #51 // =0x33 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #52, .LBB6_55 |
| ; CHECK-NEXT: .LBB6_118: // %cond.load205 |
| ; CHECK-NEXT: mov w9, #52 // =0x34 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #53, .LBB6_56 |
| ; CHECK-NEXT: .LBB6_119: // %cond.load209 |
| ; CHECK-NEXT: mov w9, #53 // =0x35 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #54, .LBB6_57 |
| ; CHECK-NEXT: .LBB6_120: // %cond.load213 |
| ; CHECK-NEXT: mov w9, #54 // =0x36 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #55, .LBB6_58 |
| ; CHECK-NEXT: .LBB6_121: // %cond.load217 |
| ; CHECK-NEXT: mov w9, #55 // =0x37 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #56, .LBB6_59 |
| ; CHECK-NEXT: .LBB6_122: // %cond.load221 |
| ; CHECK-NEXT: mov w9, #56 // =0x38 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #57, .LBB6_60 |
| ; CHECK-NEXT: .LBB6_123: // %cond.load225 |
| ; CHECK-NEXT: mov w9, #57 // =0x39 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #58, .LBB6_61 |
| ; CHECK-NEXT: .LBB6_124: // %cond.load229 |
| ; CHECK-NEXT: mov w9, #58 // =0x3a |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #59, .LBB6_62 |
| ; CHECK-NEXT: .LBB6_125: // %cond.load233 |
| ; CHECK-NEXT: mov w9, #59 // =0x3b |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #60, .LBB6_63 |
| ; CHECK-NEXT: .LBB6_126: // %cond.load237 |
| ; CHECK-NEXT: mov w9, #60 // =0x3c |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #61, .LBB6_64 |
| ; CHECK-NEXT: .LBB6_127: // %cond.load241 |
| ; CHECK-NEXT: mov w9, #61 // =0x3d |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbz x8, #62, .LBB6_65 |
| ; CHECK-NEXT: .LBB6_128: // %cond.load245 |
| ; CHECK-NEXT: mov w9, #62 // =0x3e |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: ptrue p1.s |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; CHECK-NEXT: ldr s1, [x0], #4 |
| ; CHECK-NEXT: mov z0.s, p2/m, s1 |
| ; CHECK-NEXT: tbnz x8, #63, .LBB6_66 |
| ; CHECK-NEXT: b .LBB6_67 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_v64f32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl64 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s |
| ; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <64 x float>, ptr %ap |
| %b = load <64 x float>, ptr %bp |
| %mask = fcmp oeq <64 x float> %a, %b |
| %load = call <64 x float> @llvm.masked.expandload.v64f32(ptr %ap, <64 x i1> %mask, <64 x float> zeroinitializer) |
| store <64 x float> %load, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_v64i8(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_v64i8: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #128 |
| ; VBITS_GE_256-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 128 |
| ; VBITS_GE_256-NEXT: .cfi_offset w19, -8 |
| ; VBITS_GE_256-NEXT: .cfi_offset w20, -16 |
| ; VBITS_GE_256-NEXT: .cfi_offset w21, -24 |
| ; VBITS_GE_256-NEXT: .cfi_offset w22, -32 |
| ; VBITS_GE_256-NEXT: .cfi_offset w23, -40 |
| ; VBITS_GE_256-NEXT: .cfi_offset w24, -48 |
| ; VBITS_GE_256-NEXT: .cfi_offset w25, -56 |
| ; VBITS_GE_256-NEXT: .cfi_offset w26, -64 |
| ; VBITS_GE_256-NEXT: .cfi_offset w27, -72 |
| ; VBITS_GE_256-NEXT: .cfi_offset w28, -80 |
| ; VBITS_GE_256-NEXT: .cfi_offset w30, -88 |
| ; VBITS_GE_256-NEXT: .cfi_offset w29, -96 |
| ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 |
| ; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 |
| ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] |
| ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1, x8] |
| ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b |
| ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[1] |
| ; VBITS_GE_256-NEXT: fmov w8, s0 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: umov w16, v0.b[3] |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[4] |
| ; VBITS_GE_256-NEXT: umov w11, v0.b[5] |
| ; VBITS_GE_256-NEXT: umov w14, v0.b[7] |
| ; VBITS_GE_256-NEXT: umov w15, v0.b[8] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[6] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: umov w17, v0.b[9] |
| ; VBITS_GE_256-NEXT: mov z16.b, z0.b[17] |
| ; VBITS_GE_256-NEXT: bfi w8, w13, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w18, v0.b[10] |
| ; VBITS_GE_256-NEXT: mov z24.b, z0.b[29] |
| ; VBITS_GE_256-NEXT: umov w1, v0.b[11] |
| ; VBITS_GE_256-NEXT: umov w3, v0.b[12] |
| ; VBITS_GE_256-NEXT: mov z17.b, z0.b[18] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: mov z18.b, z0.b[19] |
| ; VBITS_GE_256-NEXT: fmov w19, s16 |
| ; VBITS_GE_256-NEXT: ubfiz w22, w14, #7, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w23, w15, #8, #1 |
| ; VBITS_GE_256-NEXT: umov w4, v0.b[13] |
| ; VBITS_GE_256-NEXT: bfi w8, w16, #3, #1 |
| ; VBITS_GE_256-NEXT: mov z19.b, z0.b[20] |
| ; VBITS_GE_256-NEXT: mov z25.b, z0.b[30] |
| ; VBITS_GE_256-NEXT: ubfiz w17, w17, #9, #1 |
| ; VBITS_GE_256-NEXT: umov w5, v0.b[14] |
| ; VBITS_GE_256-NEXT: mov z20.b, z0.b[21] |
| ; VBITS_GE_256-NEXT: bfi w8, w12, #4, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w18, #10, #1 |
| ; VBITS_GE_256-NEXT: orr w18, w22, w23 |
| ; VBITS_GE_256-NEXT: mov z21.b, z0.b[22] |
| ; VBITS_GE_256-NEXT: str w19, [sp, #16] // 4-byte Spill |
| ; VBITS_GE_256-NEXT: fmov w19, s17 |
| ; VBITS_GE_256-NEXT: bfi w8, w11, #5, #1 |
| ; VBITS_GE_256-NEXT: fmov w20, s18 |
| ; VBITS_GE_256-NEXT: ubfiz w1, w1, #11, #1 |
| ; VBITS_GE_256-NEXT: orr w17, w18, w17 |
| ; VBITS_GE_256-NEXT: mov z22.b, z0.b[23] |
| ; VBITS_GE_256-NEXT: fmov w21, s19 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #6, #1 |
| ; VBITS_GE_256-NEXT: fmov w9, s24 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w3, #12, #1 |
| ; VBITS_GE_256-NEXT: orr w10, w17, w10 |
| ; VBITS_GE_256-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z7.b, z0.b[16] |
| ; VBITS_GE_256-NEXT: mov z3.b, z0.b[24] |
| ; VBITS_GE_256-NEXT: fmov w13, s20 |
| ; VBITS_GE_256-NEXT: ubfiz w18, w4, #13, #1 |
| ; VBITS_GE_256-NEXT: str w9, [sp, #8] // 4-byte Spill |
| ; VBITS_GE_256-NEXT: fmov w9, s25 |
| ; VBITS_GE_256-NEXT: orr w10, w10, w1 |
| ; VBITS_GE_256-NEXT: mov z4.b, z0.b[25] |
| ; VBITS_GE_256-NEXT: fmov w14, s21 |
| ; VBITS_GE_256-NEXT: ubfiz w17, w5, #14, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w1, w19, #18, #1 |
| ; VBITS_GE_256-NEXT: orr w10, w10, w16 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w20, #19, #1 |
| ; VBITS_GE_256-NEXT: fmov w15, s22 |
| ; VBITS_GE_256-NEXT: orr w10, w10, w18 |
| ; VBITS_GE_256-NEXT: ubfiz w24, w21, #20, #1 |
| ; VBITS_GE_256-NEXT: str w9, [sp, #20] // 4-byte Spill |
| ; VBITS_GE_256-NEXT: umov w9, v1.b[6] |
| ; VBITS_GE_256-NEXT: fmov w7, s7 |
| ; VBITS_GE_256-NEXT: orr w17, w10, w17 |
| ; VBITS_GE_256-NEXT: orr w10, w1, w16 |
| ; VBITS_GE_256-NEXT: fmov w22, s3 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #21, #1 |
| ; VBITS_GE_256-NEXT: umov w6, v0.b[15] |
| ; VBITS_GE_256-NEXT: fmov w21, s4 |
| ; VBITS_GE_256-NEXT: orr w24, w10, w24 |
| ; VBITS_GE_256-NEXT: ubfiz w14, w14, #22, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w15, w15, #23, #1 |
| ; VBITS_GE_256-NEXT: orr w24, w24, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w7, #16, #1 |
| ; VBITS_GE_256-NEXT: umov w7, v1.b[1] |
| ; VBITS_GE_256-NEXT: str w9, [sp, #12] // 4-byte Spill |
| ; VBITS_GE_256-NEXT: fmov w9, s1 |
| ; VBITS_GE_256-NEXT: orr w14, w24, w14 |
| ; VBITS_GE_256-NEXT: ubfiz w22, w22, #24, #1 |
| ; VBITS_GE_256-NEXT: mov z5.b, z0.b[26] |
| ; VBITS_GE_256-NEXT: orr w14, w14, w15 |
| ; VBITS_GE_256-NEXT: ubfiz w15, w21, #25, #1 |
| ; VBITS_GE_256-NEXT: mov z6.b, z0.b[27] |
| ; VBITS_GE_256-NEXT: ubfiz w12, w6, #15, #1 |
| ; VBITS_GE_256-NEXT: umov w6, v1.b[2] |
| ; VBITS_GE_256-NEXT: orr w14, w14, w22 |
| ; VBITS_GE_256-NEXT: mov z23.b, z0.b[28] |
| ; VBITS_GE_256-NEXT: orr w15, w14, w15 |
| ; VBITS_GE_256-NEXT: and w14, w9, #0x1 |
| ; VBITS_GE_256-NEXT: fmov w3, s5 |
| ; VBITS_GE_256-NEXT: umov w4, v1.b[3] |
| ; VBITS_GE_256-NEXT: umov w28, v1.b[8] |
| ; VBITS_GE_256-NEXT: bfi w14, w7, #1, #1 |
| ; VBITS_GE_256-NEXT: fmov w18, s6 |
| ; VBITS_GE_256-NEXT: umov w26, v1.b[7] |
| ; VBITS_GE_256-NEXT: orr w12, w17, w12 |
| ; VBITS_GE_256-NEXT: fmov w17, s23 |
| ; VBITS_GE_256-NEXT: umov w1, v1.b[4] |
| ; VBITS_GE_256-NEXT: umov w29, v1.b[9] |
| ; VBITS_GE_256-NEXT: bfi w14, w6, #2, #1 |
| ; VBITS_GE_256-NEXT: orr w5, w12, w11 |
| ; VBITS_GE_256-NEXT: umov w30, v1.b[10] |
| ; VBITS_GE_256-NEXT: mov z7.b, z1.b[18] |
| ; VBITS_GE_256-NEXT: mov z16.b, z1.b[19] |
| ; VBITS_GE_256-NEXT: ldr w11, [sp, #16] // 4-byte Reload |
| ; VBITS_GE_256-NEXT: ubfiz w3, w3, #26, #1 |
| ; VBITS_GE_256-NEXT: umov w16, v1.b[5] |
| ; VBITS_GE_256-NEXT: umov w23, v1.b[11] |
| ; VBITS_GE_256-NEXT: mov z17.b, z1.b[20] |
| ; VBITS_GE_256-NEXT: ubfiz w18, w18, #27, #1 |
| ; VBITS_GE_256-NEXT: bfi w14, w4, #3, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w7, w28, #8, #1 |
| ; VBITS_GE_256-NEXT: ldr w28, [sp, #8] // 4-byte Reload |
| ; VBITS_GE_256-NEXT: umov w27, v1.b[12] |
| ; VBITS_GE_256-NEXT: mov z18.b, z1.b[21] |
| ; VBITS_GE_256-NEXT: ubfiz w11, w11, #17, #1 |
| ; VBITS_GE_256-NEXT: orr w15, w15, w3 |
| ; VBITS_GE_256-NEXT: ubfiz w17, w17, #28, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w4, w26, #7, #1 |
| ; VBITS_GE_256-NEXT: umov w25, v1.b[13] |
| ; VBITS_GE_256-NEXT: mov z19.b, z1.b[22] |
| ; VBITS_GE_256-NEXT: fmov w24, s7 |
| ; VBITS_GE_256-NEXT: fmov w21, s16 |
| ; VBITS_GE_256-NEXT: orr w15, w15, w18 |
| ; VBITS_GE_256-NEXT: ubfiz w28, w28, #29, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w29, w29, #9, #1 |
| ; VBITS_GE_256-NEXT: bfi w14, w1, #4, #1 |
| ; VBITS_GE_256-NEXT: mov z20.b, z1.b[23] |
| ; VBITS_GE_256-NEXT: fmov w22, s17 |
| ; VBITS_GE_256-NEXT: orr w11, w5, w11 |
| ; VBITS_GE_256-NEXT: orr w15, w15, w17 |
| ; VBITS_GE_256-NEXT: orr w1, w4, w7 |
| ; VBITS_GE_256-NEXT: ubfiz w4, w30, #10, #1 |
| ; VBITS_GE_256-NEXT: fmov w5, s18 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_256-NEXT: orr w11, w15, w28 |
| ; VBITS_GE_256-NEXT: orr w15, w1, w29 |
| ; VBITS_GE_256-NEXT: bfi w14, w16, #5, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w23, #11, #1 |
| ; VBITS_GE_256-NEXT: mov z21.b, z1.b[24] |
| ; VBITS_GE_256-NEXT: mov z22.b, z1.b[25] |
| ; VBITS_GE_256-NEXT: fmov w12, s19 |
| ; VBITS_GE_256-NEXT: orr w15, w15, w4 |
| ; VBITS_GE_256-NEXT: ubfiz w1, w27, #12, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w4, w24, #18, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w7, w21, #19, #1 |
| ; VBITS_GE_256-NEXT: umov w19, v1.b[14] |
| ; VBITS_GE_256-NEXT: fmov w9, s20 |
| ; VBITS_GE_256-NEXT: orr w15, w15, w16 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w25, #13, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w21, w22, #20, #1 |
| ; VBITS_GE_256-NEXT: umov w20, v1.b[15] |
| ; VBITS_GE_256-NEXT: mov z23.b, z1.b[26] |
| ; VBITS_GE_256-NEXT: orr w15, w15, w1 |
| ; VBITS_GE_256-NEXT: orr w1, w4, w7 |
| ; VBITS_GE_256-NEXT: ubfiz w4, w5, #21, #1 |
| ; VBITS_GE_256-NEXT: mov z24.b, z1.b[27] |
| ; VBITS_GE_256-NEXT: fmov w3, s21 |
| ; VBITS_GE_256-NEXT: fmov w18, s22 |
| ; VBITS_GE_256-NEXT: orr w15, w15, w16 |
| ; VBITS_GE_256-NEXT: orr w16, w1, w21 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #22, #1 |
| ; VBITS_GE_256-NEXT: orr w16, w16, w4 |
| ; VBITS_GE_256-NEXT: ubfiz w9, w9, #23, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, z1.b[29] |
| ; VBITS_GE_256-NEXT: fmov w6, s23 |
| ; VBITS_GE_256-NEXT: ubfiz w1, w19, #14, #1 |
| ; VBITS_GE_256-NEXT: orr w12, w16, w12 |
| ; VBITS_GE_256-NEXT: mov z5.b, z1.b[16] |
| ; VBITS_GE_256-NEXT: mov z25.b, z1.b[28] |
| ; VBITS_GE_256-NEXT: fmov w17, s24 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w20, #15, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w3, w3, #24, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w12, w9 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w18, #25, #1 |
| ; VBITS_GE_256-NEXT: ldr w18, [sp, #12] // 4-byte Reload |
| ; VBITS_GE_256-NEXT: mov z6.b, z1.b[17] |
| ; VBITS_GE_256-NEXT: orr w15, w15, w1 |
| ; VBITS_GE_256-NEXT: mov z4.b, z1.b[30] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w3 |
| ; VBITS_GE_256-NEXT: bfi w14, w18, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w15, w15, w16 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w6, #26, #1 |
| ; VBITS_GE_256-NEXT: fmov w18, s3 |
| ; VBITS_GE_256-NEXT: fmov w10, s5 |
| ; VBITS_GE_256-NEXT: fmov w26, s25 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_256-NEXT: ubfiz w17, w17, #27, #1 |
| ; VBITS_GE_256-NEXT: fmov w13, s6 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: ldr w12, [sp, #20] // 4-byte Reload |
| ; VBITS_GE_256-NEXT: mov z0.b, z0.b[31] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w17 |
| ; VBITS_GE_256-NEXT: ubfiz w17, w18, #29, #1 |
| ; VBITS_GE_256-NEXT: fmov w18, s4 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w10, #16, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w26, #28, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #17, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, z1.b[31] |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #30, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.b |
| ; VBITS_GE_256-NEXT: orr w10, w15, w10 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: ubfiz w15, w18, #30, #1 |
| ; VBITS_GE_256-NEXT: orr w10, w10, w13 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w17 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: orr w10, w14, w10 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w15 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_256-NEXT: fmov w11, s0 |
| ; VBITS_GE_256-NEXT: orr w9, w10, w9 |
| ; VBITS_GE_256-NEXT: fmov w10, s2 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w11, lsl #31 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w10, lsl #31 |
| ; VBITS_GE_256-NEXT: orr x8, x9, x8, lsl #32 |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI7_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI7_0 |
| ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #0, .LBB7_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rb { z2.b }, p1/z, [x0] |
| ; VBITS_GE_256-NEXT: mov z1.d, z0.d |
| ; VBITS_GE_256-NEXT: add x0, x0, #1 |
| ; VBITS_GE_256-NEXT: mov z0.d, z2.d |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB7_3 |
| ; VBITS_GE_256-NEXT: b .LBB7_4 |
| ; VBITS_GE_256-NEXT: .LBB7_2: |
| ; VBITS_GE_256-NEXT: mov z1.d, z0.d |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB7_4 |
| ; VBITS_GE_256-NEXT: .LBB7_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB7_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB7_68 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB7_69 |
| ; VBITS_GE_256-NEXT: .LBB7_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB7_70 |
| ; VBITS_GE_256-NEXT: .LBB7_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB7_71 |
| ; VBITS_GE_256-NEXT: .LBB7_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB7_72 |
| ; VBITS_GE_256-NEXT: .LBB7_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB7_73 |
| ; VBITS_GE_256-NEXT: .LBB7_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB7_74 |
| ; VBITS_GE_256-NEXT: .LBB7_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB7_75 |
| ; VBITS_GE_256-NEXT: .LBB7_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB7_76 |
| ; VBITS_GE_256-NEXT: .LBB7_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB7_77 |
| ; VBITS_GE_256-NEXT: .LBB7_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB7_78 |
| ; VBITS_GE_256-NEXT: .LBB7_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB7_79 |
| ; VBITS_GE_256-NEXT: .LBB7_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB7_80 |
| ; VBITS_GE_256-NEXT: .LBB7_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB7_81 |
| ; VBITS_GE_256-NEXT: .LBB7_18: // %else58 |
| ; VBITS_GE_256-NEXT: tbnz w8, #16, .LBB7_82 |
| ; VBITS_GE_256-NEXT: .LBB7_19: // %else62 |
| ; VBITS_GE_256-NEXT: tbnz w8, #17, .LBB7_83 |
| ; VBITS_GE_256-NEXT: .LBB7_20: // %else66 |
| ; VBITS_GE_256-NEXT: tbnz w8, #18, .LBB7_84 |
| ; VBITS_GE_256-NEXT: .LBB7_21: // %else70 |
| ; VBITS_GE_256-NEXT: tbnz w8, #19, .LBB7_85 |
| ; VBITS_GE_256-NEXT: .LBB7_22: // %else74 |
| ; VBITS_GE_256-NEXT: tbnz w8, #20, .LBB7_86 |
| ; VBITS_GE_256-NEXT: .LBB7_23: // %else78 |
| ; VBITS_GE_256-NEXT: tbnz w8, #21, .LBB7_87 |
| ; VBITS_GE_256-NEXT: .LBB7_24: // %else82 |
| ; VBITS_GE_256-NEXT: tbnz w8, #22, .LBB7_88 |
| ; VBITS_GE_256-NEXT: .LBB7_25: // %else86 |
| ; VBITS_GE_256-NEXT: tbnz w8, #23, .LBB7_89 |
| ; VBITS_GE_256-NEXT: .LBB7_26: // %else90 |
| ; VBITS_GE_256-NEXT: tbnz w8, #24, .LBB7_90 |
| ; VBITS_GE_256-NEXT: .LBB7_27: // %else94 |
| ; VBITS_GE_256-NEXT: tbnz w8, #25, .LBB7_91 |
| ; VBITS_GE_256-NEXT: .LBB7_28: // %else98 |
| ; VBITS_GE_256-NEXT: tbnz w8, #26, .LBB7_92 |
| ; VBITS_GE_256-NEXT: .LBB7_29: // %else102 |
| ; VBITS_GE_256-NEXT: tbnz w8, #27, .LBB7_93 |
| ; VBITS_GE_256-NEXT: .LBB7_30: // %else106 |
| ; VBITS_GE_256-NEXT: tbnz w8, #28, .LBB7_94 |
| ; VBITS_GE_256-NEXT: .LBB7_31: // %else110 |
| ; VBITS_GE_256-NEXT: tbnz w8, #29, .LBB7_95 |
| ; VBITS_GE_256-NEXT: .LBB7_32: // %else114 |
| ; VBITS_GE_256-NEXT: tbnz w8, #30, .LBB7_96 |
| ; VBITS_GE_256-NEXT: .LBB7_33: // %else118 |
| ; VBITS_GE_256-NEXT: tbnz w8, #31, .LBB7_97 |
| ; VBITS_GE_256-NEXT: .LBB7_34: // %else122 |
| ; VBITS_GE_256-NEXT: tbnz x8, #32, .LBB7_98 |
| ; VBITS_GE_256-NEXT: .LBB7_35: // %else126 |
| ; VBITS_GE_256-NEXT: tbnz x8, #33, .LBB7_99 |
| ; VBITS_GE_256-NEXT: .LBB7_36: // %else130 |
| ; VBITS_GE_256-NEXT: tbnz x8, #34, .LBB7_100 |
| ; VBITS_GE_256-NEXT: .LBB7_37: // %else134 |
| ; VBITS_GE_256-NEXT: tbnz x8, #35, .LBB7_101 |
| ; VBITS_GE_256-NEXT: .LBB7_38: // %else138 |
| ; VBITS_GE_256-NEXT: tbnz x8, #36, .LBB7_102 |
| ; VBITS_GE_256-NEXT: .LBB7_39: // %else142 |
| ; VBITS_GE_256-NEXT: tbnz x8, #37, .LBB7_103 |
| ; VBITS_GE_256-NEXT: .LBB7_40: // %else146 |
| ; VBITS_GE_256-NEXT: tbnz x8, #38, .LBB7_104 |
| ; VBITS_GE_256-NEXT: .LBB7_41: // %else150 |
| ; VBITS_GE_256-NEXT: tbnz x8, #39, .LBB7_105 |
| ; VBITS_GE_256-NEXT: .LBB7_42: // %else154 |
| ; VBITS_GE_256-NEXT: tbnz x8, #40, .LBB7_106 |
| ; VBITS_GE_256-NEXT: .LBB7_43: // %else158 |
| ; VBITS_GE_256-NEXT: tbnz x8, #41, .LBB7_107 |
| ; VBITS_GE_256-NEXT: .LBB7_44: // %else162 |
| ; VBITS_GE_256-NEXT: tbnz x8, #42, .LBB7_108 |
| ; VBITS_GE_256-NEXT: .LBB7_45: // %else166 |
| ; VBITS_GE_256-NEXT: tbnz x8, #43, .LBB7_109 |
| ; VBITS_GE_256-NEXT: .LBB7_46: // %else170 |
| ; VBITS_GE_256-NEXT: tbnz x8, #44, .LBB7_110 |
| ; VBITS_GE_256-NEXT: .LBB7_47: // %else174 |
| ; VBITS_GE_256-NEXT: tbnz x8, #45, .LBB7_111 |
| ; VBITS_GE_256-NEXT: .LBB7_48: // %else178 |
| ; VBITS_GE_256-NEXT: tbnz x8, #46, .LBB7_112 |
| ; VBITS_GE_256-NEXT: .LBB7_49: // %else182 |
| ; VBITS_GE_256-NEXT: tbnz x8, #47, .LBB7_113 |
| ; VBITS_GE_256-NEXT: .LBB7_50: // %else186 |
| ; VBITS_GE_256-NEXT: tbnz x8, #48, .LBB7_114 |
| ; VBITS_GE_256-NEXT: .LBB7_51: // %else190 |
| ; VBITS_GE_256-NEXT: tbnz x8, #49, .LBB7_115 |
| ; VBITS_GE_256-NEXT: .LBB7_52: // %else194 |
| ; VBITS_GE_256-NEXT: tbnz x8, #50, .LBB7_116 |
| ; VBITS_GE_256-NEXT: .LBB7_53: // %else198 |
| ; VBITS_GE_256-NEXT: tbnz x8, #51, .LBB7_117 |
| ; VBITS_GE_256-NEXT: .LBB7_54: // %else202 |
| ; VBITS_GE_256-NEXT: tbnz x8, #52, .LBB7_118 |
| ; VBITS_GE_256-NEXT: .LBB7_55: // %else206 |
| ; VBITS_GE_256-NEXT: tbnz x8, #53, .LBB7_119 |
| ; VBITS_GE_256-NEXT: .LBB7_56: // %else210 |
| ; VBITS_GE_256-NEXT: tbnz x8, #54, .LBB7_120 |
| ; VBITS_GE_256-NEXT: .LBB7_57: // %else214 |
| ; VBITS_GE_256-NEXT: tbnz x8, #55, .LBB7_121 |
| ; VBITS_GE_256-NEXT: .LBB7_58: // %else218 |
| ; VBITS_GE_256-NEXT: tbnz x8, #56, .LBB7_122 |
| ; VBITS_GE_256-NEXT: .LBB7_59: // %else222 |
| ; VBITS_GE_256-NEXT: tbnz x8, #57, .LBB7_123 |
| ; VBITS_GE_256-NEXT: .LBB7_60: // %else226 |
| ; VBITS_GE_256-NEXT: tbnz x8, #58, .LBB7_124 |
| ; VBITS_GE_256-NEXT: .LBB7_61: // %else230 |
| ; VBITS_GE_256-NEXT: tbnz x8, #59, .LBB7_125 |
| ; VBITS_GE_256-NEXT: .LBB7_62: // %else234 |
| ; VBITS_GE_256-NEXT: tbnz x8, #60, .LBB7_126 |
| ; VBITS_GE_256-NEXT: .LBB7_63: // %else238 |
| ; VBITS_GE_256-NEXT: tbnz x8, #61, .LBB7_127 |
| ; VBITS_GE_256-NEXT: .LBB7_64: // %else242 |
| ; VBITS_GE_256-NEXT: tbnz x8, #62, .LBB7_128 |
| ; VBITS_GE_256-NEXT: .LBB7_65: // %else246 |
| ; VBITS_GE_256-NEXT: tbz x8, #63, .LBB7_67 |
| ; VBITS_GE_256-NEXT: .LBB7_66: // %cond.load249 |
| ; VBITS_GE_256-NEXT: mov w8, #31 // =0x1f |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w8 |
| ; VBITS_GE_256-NEXT: ldrb w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB7_67: // %else250 |
| ; VBITS_GE_256-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 |
| ; VBITS_GE_256-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x2, x8] |
| ; VBITS_GE_256-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x2] |
| ; VBITS_GE_256-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: add sp, sp, #128 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB7_68: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB7_6 |
| ; VBITS_GE_256-NEXT: .LBB7_69: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB7_7 |
| ; VBITS_GE_256-NEXT: .LBB7_70: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB7_8 |
| ; VBITS_GE_256-NEXT: .LBB7_71: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB7_9 |
| ; VBITS_GE_256-NEXT: .LBB7_72: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB7_10 |
| ; VBITS_GE_256-NEXT: .LBB7_73: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB7_11 |
| ; VBITS_GE_256-NEXT: .LBB7_74: // %cond.load29 |
| ; VBITS_GE_256-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB7_12 |
| ; VBITS_GE_256-NEXT: .LBB7_75: // %cond.load33 |
| ; VBITS_GE_256-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB7_13 |
| ; VBITS_GE_256-NEXT: .LBB7_76: // %cond.load37 |
| ; VBITS_GE_256-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB7_14 |
| ; VBITS_GE_256-NEXT: .LBB7_77: // %cond.load41 |
| ; VBITS_GE_256-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB7_15 |
| ; VBITS_GE_256-NEXT: .LBB7_78: // %cond.load45 |
| ; VBITS_GE_256-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB7_16 |
| ; VBITS_GE_256-NEXT: .LBB7_79: // %cond.load49 |
| ; VBITS_GE_256-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB7_17 |
| ; VBITS_GE_256-NEXT: .LBB7_80: // %cond.load53 |
| ; VBITS_GE_256-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB7_18 |
| ; VBITS_GE_256-NEXT: .LBB7_81: // %cond.load57 |
| ; VBITS_GE_256-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #16, .LBB7_19 |
| ; VBITS_GE_256-NEXT: .LBB7_82: // %cond.load61 |
| ; VBITS_GE_256-NEXT: mov w9, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #17, .LBB7_20 |
| ; VBITS_GE_256-NEXT: .LBB7_83: // %cond.load65 |
| ; VBITS_GE_256-NEXT: mov w9, #17 // =0x11 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #18, .LBB7_21 |
| ; VBITS_GE_256-NEXT: .LBB7_84: // %cond.load69 |
| ; VBITS_GE_256-NEXT: mov w9, #18 // =0x12 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #19, .LBB7_22 |
| ; VBITS_GE_256-NEXT: .LBB7_85: // %cond.load73 |
| ; VBITS_GE_256-NEXT: mov w9, #19 // =0x13 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #20, .LBB7_23 |
| ; VBITS_GE_256-NEXT: .LBB7_86: // %cond.load77 |
| ; VBITS_GE_256-NEXT: mov w9, #20 // =0x14 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #21, .LBB7_24 |
| ; VBITS_GE_256-NEXT: .LBB7_87: // %cond.load81 |
| ; VBITS_GE_256-NEXT: mov w9, #21 // =0x15 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #22, .LBB7_25 |
| ; VBITS_GE_256-NEXT: .LBB7_88: // %cond.load85 |
| ; VBITS_GE_256-NEXT: mov w9, #22 // =0x16 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #23, .LBB7_26 |
| ; VBITS_GE_256-NEXT: .LBB7_89: // %cond.load89 |
| ; VBITS_GE_256-NEXT: mov w9, #23 // =0x17 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #24, .LBB7_27 |
| ; VBITS_GE_256-NEXT: .LBB7_90: // %cond.load93 |
| ; VBITS_GE_256-NEXT: mov w9, #24 // =0x18 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #25, .LBB7_28 |
| ; VBITS_GE_256-NEXT: .LBB7_91: // %cond.load97 |
| ; VBITS_GE_256-NEXT: mov w9, #25 // =0x19 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #26, .LBB7_29 |
| ; VBITS_GE_256-NEXT: .LBB7_92: // %cond.load101 |
| ; VBITS_GE_256-NEXT: mov w9, #26 // =0x1a |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #27, .LBB7_30 |
| ; VBITS_GE_256-NEXT: .LBB7_93: // %cond.load105 |
| ; VBITS_GE_256-NEXT: mov w9, #27 // =0x1b |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #28, .LBB7_31 |
| ; VBITS_GE_256-NEXT: .LBB7_94: // %cond.load109 |
| ; VBITS_GE_256-NEXT: mov w9, #28 // =0x1c |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #29, .LBB7_32 |
| ; VBITS_GE_256-NEXT: .LBB7_95: // %cond.load113 |
| ; VBITS_GE_256-NEXT: mov w9, #29 // =0x1d |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #30, .LBB7_33 |
| ; VBITS_GE_256-NEXT: .LBB7_96: // %cond.load117 |
| ; VBITS_GE_256-NEXT: mov w9, #30 // =0x1e |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #31, .LBB7_34 |
| ; VBITS_GE_256-NEXT: .LBB7_97: // %cond.load121 |
| ; VBITS_GE_256-NEXT: mov w9, #31 // =0x1f |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #32, .LBB7_35 |
| ; VBITS_GE_256-NEXT: .LBB7_98: // %cond.load125 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: ptrue p2.b, vl1 |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #33, .LBB7_36 |
| ; VBITS_GE_256-NEXT: .LBB7_99: // %cond.load129 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #34, .LBB7_37 |
| ; VBITS_GE_256-NEXT: .LBB7_100: // %cond.load133 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #35, .LBB7_38 |
| ; VBITS_GE_256-NEXT: .LBB7_101: // %cond.load137 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #36, .LBB7_39 |
| ; VBITS_GE_256-NEXT: .LBB7_102: // %cond.load141 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #37, .LBB7_40 |
| ; VBITS_GE_256-NEXT: .LBB7_103: // %cond.load145 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #38, .LBB7_41 |
| ; VBITS_GE_256-NEXT: .LBB7_104: // %cond.load149 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #39, .LBB7_42 |
| ; VBITS_GE_256-NEXT: .LBB7_105: // %cond.load153 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #40, .LBB7_43 |
| ; VBITS_GE_256-NEXT: .LBB7_106: // %cond.load157 |
| ; VBITS_GE_256-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #41, .LBB7_44 |
| ; VBITS_GE_256-NEXT: .LBB7_107: // %cond.load161 |
| ; VBITS_GE_256-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #42, .LBB7_45 |
| ; VBITS_GE_256-NEXT: .LBB7_108: // %cond.load165 |
| ; VBITS_GE_256-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #43, .LBB7_46 |
| ; VBITS_GE_256-NEXT: .LBB7_109: // %cond.load169 |
| ; VBITS_GE_256-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #44, .LBB7_47 |
| ; VBITS_GE_256-NEXT: .LBB7_110: // %cond.load173 |
| ; VBITS_GE_256-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #45, .LBB7_48 |
| ; VBITS_GE_256-NEXT: .LBB7_111: // %cond.load177 |
| ; VBITS_GE_256-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #46, .LBB7_49 |
| ; VBITS_GE_256-NEXT: .LBB7_112: // %cond.load181 |
| ; VBITS_GE_256-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #47, .LBB7_50 |
| ; VBITS_GE_256-NEXT: .LBB7_113: // %cond.load185 |
| ; VBITS_GE_256-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #48, .LBB7_51 |
| ; VBITS_GE_256-NEXT: .LBB7_114: // %cond.load189 |
| ; VBITS_GE_256-NEXT: mov w9, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #49, .LBB7_52 |
| ; VBITS_GE_256-NEXT: .LBB7_115: // %cond.load193 |
| ; VBITS_GE_256-NEXT: mov w9, #17 // =0x11 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #50, .LBB7_53 |
| ; VBITS_GE_256-NEXT: .LBB7_116: // %cond.load197 |
| ; VBITS_GE_256-NEXT: mov w9, #18 // =0x12 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #51, .LBB7_54 |
| ; VBITS_GE_256-NEXT: .LBB7_117: // %cond.load201 |
| ; VBITS_GE_256-NEXT: mov w9, #19 // =0x13 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #52, .LBB7_55 |
| ; VBITS_GE_256-NEXT: .LBB7_118: // %cond.load205 |
| ; VBITS_GE_256-NEXT: mov w9, #20 // =0x14 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #53, .LBB7_56 |
| ; VBITS_GE_256-NEXT: .LBB7_119: // %cond.load209 |
| ; VBITS_GE_256-NEXT: mov w9, #21 // =0x15 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #54, .LBB7_57 |
| ; VBITS_GE_256-NEXT: .LBB7_120: // %cond.load213 |
| ; VBITS_GE_256-NEXT: mov w9, #22 // =0x16 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #55, .LBB7_58 |
| ; VBITS_GE_256-NEXT: .LBB7_121: // %cond.load217 |
| ; VBITS_GE_256-NEXT: mov w9, #23 // =0x17 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #56, .LBB7_59 |
| ; VBITS_GE_256-NEXT: .LBB7_122: // %cond.load221 |
| ; VBITS_GE_256-NEXT: mov w9, #24 // =0x18 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #57, .LBB7_60 |
| ; VBITS_GE_256-NEXT: .LBB7_123: // %cond.load225 |
| ; VBITS_GE_256-NEXT: mov w9, #25 // =0x19 |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #58, .LBB7_61 |
| ; VBITS_GE_256-NEXT: .LBB7_124: // %cond.load229 |
| ; VBITS_GE_256-NEXT: mov w9, #26 // =0x1a |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #59, .LBB7_62 |
| ; VBITS_GE_256-NEXT: .LBB7_125: // %cond.load233 |
| ; VBITS_GE_256-NEXT: mov w9, #27 // =0x1b |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #60, .LBB7_63 |
| ; VBITS_GE_256-NEXT: .LBB7_126: // %cond.load237 |
| ; VBITS_GE_256-NEXT: mov w9, #28 // =0x1c |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #61, .LBB7_64 |
| ; VBITS_GE_256-NEXT: .LBB7_127: // %cond.load241 |
| ; VBITS_GE_256-NEXT: mov w9, #29 // =0x1d |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz x8, #62, .LBB7_65 |
| ; VBITS_GE_256-NEXT: .LBB7_128: // %cond.load245 |
| ; VBITS_GE_256-NEXT: mov w9, #30 // =0x1e |
| ; VBITS_GE_256-NEXT: index z2.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b |
| ; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz x8, #63, .LBB7_66 |
| ; VBITS_GE_256-NEXT: b .LBB7_67 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_v64i8: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #112 |
| ; VBITS_GE_512-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 112 |
| ; VBITS_GE_512-NEXT: .cfi_offset w19, -8 |
| ; VBITS_GE_512-NEXT: .cfi_offset w20, -16 |
| ; VBITS_GE_512-NEXT: .cfi_offset w21, -24 |
| ; VBITS_GE_512-NEXT: .cfi_offset w22, -32 |
| ; VBITS_GE_512-NEXT: .cfi_offset w23, -40 |
| ; VBITS_GE_512-NEXT: .cfi_offset w24, -48 |
| ; VBITS_GE_512-NEXT: .cfi_offset w25, -56 |
| ; VBITS_GE_512-NEXT: .cfi_offset w26, -64 |
| ; VBITS_GE_512-NEXT: .cfi_offset w27, -72 |
| ; VBITS_GE_512-NEXT: .cfi_offset w28, -80 |
| ; VBITS_GE_512-NEXT: .cfi_offset w30, -88 |
| ; VBITS_GE_512-NEXT: .cfi_offset w29, -96 |
| ; VBITS_GE_512-NEXT: ptrue p0.b, vl64 |
| ; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p1.b |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[1] |
| ; VBITS_GE_512-NEXT: fmov w22, s0 |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[3] |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w1, v0.b[8] |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[9] |
| ; VBITS_GE_512-NEXT: mov z3.b, z0.b[18] |
| ; VBITS_GE_512-NEXT: mov z5.b, z0.b[19] |
| ; VBITS_GE_512-NEXT: and x22, x22, #0x1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: umov w17, v0.b[10] |
| ; VBITS_GE_512-NEXT: bfi x22, x11, #1, #1 |
| ; VBITS_GE_512-NEXT: mov z6.b, z0.b[20] |
| ; VBITS_GE_512-NEXT: umov w3, v0.b[11] |
| ; VBITS_GE_512-NEXT: mov z4.b, z0.b[21] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: mov z7.b, z0.b[22] |
| ; VBITS_GE_512-NEXT: bfi x22, x12, #2, #1 |
| ; VBITS_GE_512-NEXT: fmov w19, s3 |
| ; VBITS_GE_512-NEXT: fmov w20, s5 |
| ; VBITS_GE_512-NEXT: ubfiz x14, x14, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz x1, x1, #8, #1 |
| ; VBITS_GE_512-NEXT: umov w4, v0.b[12] |
| ; VBITS_GE_512-NEXT: bfi x22, x13, #3, #1 |
| ; VBITS_GE_512-NEXT: mov z16.b, z0.b[23] |
| ; VBITS_GE_512-NEXT: fmov w21, s6 |
| ; VBITS_GE_512-NEXT: ubfiz x16, x16, #9, #1 |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[6] |
| ; VBITS_GE_512-NEXT: umov w5, v0.b[13] |
| ; VBITS_GE_512-NEXT: mov z17.b, z0.b[24] |
| ; VBITS_GE_512-NEXT: fmov w23, s4 |
| ; VBITS_GE_512-NEXT: orr x14, x14, x1 |
| ; VBITS_GE_512-NEXT: bfi x22, x10, #4, #1 |
| ; VBITS_GE_512-NEXT: ubfiz x10, x17, #10, #1 |
| ; VBITS_GE_512-NEXT: mov z18.b, z0.b[25] |
| ; VBITS_GE_512-NEXT: fmov w24, s7 |
| ; VBITS_GE_512-NEXT: ubfiz x13, x19, #18, #1 |
| ; VBITS_GE_512-NEXT: ubfiz x19, x20, #19, #1 |
| ; VBITS_GE_512-NEXT: orr x14, x14, x16 |
| ; VBITS_GE_512-NEXT: ubfiz x16, x3, #11, #1 |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[14] |
| ; VBITS_GE_512-NEXT: mov z19.b, z0.b[26] |
| ; VBITS_GE_512-NEXT: fmov w25, s16 |
| ; VBITS_GE_512-NEXT: ubfiz x1, x21, #20, #1 |
| ; VBITS_GE_512-NEXT: orr x10, x14, x10 |
| ; VBITS_GE_512-NEXT: bfi x22, x9, #5, #1 |
| ; VBITS_GE_512-NEXT: mov z20.b, z0.b[27] |
| ; VBITS_GE_512-NEXT: fmov w26, s17 |
| ; VBITS_GE_512-NEXT: orr x13, x13, x19 |
| ; VBITS_GE_512-NEXT: ubfiz x9, x4, #12, #1 |
| ; VBITS_GE_512-NEXT: orr x10, x10, x16 |
| ; VBITS_GE_512-NEXT: ubfiz x16, x23, #21, #1 |
| ; VBITS_GE_512-NEXT: umov w18, v0.b[15] |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[16] |
| ; VBITS_GE_512-NEXT: mov z21.b, z0.b[28] |
| ; VBITS_GE_512-NEXT: fmov w11, s18 |
| ; VBITS_GE_512-NEXT: orr x13, x13, x1 |
| ; VBITS_GE_512-NEXT: ubfiz x14, x5, #13, #1 |
| ; VBITS_GE_512-NEXT: bfi x22, x8, #6, #1 |
| ; VBITS_GE_512-NEXT: ubfiz x8, x24, #22, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[17] |
| ; VBITS_GE_512-NEXT: mov z22.b, z0.b[29] |
| ; VBITS_GE_512-NEXT: fmov w27, s19 |
| ; VBITS_GE_512-NEXT: orr x9, x10, x9 |
| ; VBITS_GE_512-NEXT: orr x10, x13, x16 |
| ; VBITS_GE_512-NEXT: ubfiz x13, x25, #23, #1 |
| ; VBITS_GE_512-NEXT: mov z5.b, z0.b[30] |
| ; VBITS_GE_512-NEXT: fmov w28, s20 |
| ; VBITS_GE_512-NEXT: orr x9, x9, x14 |
| ; VBITS_GE_512-NEXT: orr x8, x10, x8 |
| ; VBITS_GE_512-NEXT: ubfiz x10, x15, #14, #1 |
| ; VBITS_GE_512-NEXT: ubfiz x14, x26, #24, #1 |
| ; VBITS_GE_512-NEXT: fmov w6, s1 |
| ; VBITS_GE_512-NEXT: fmov w29, s21 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x13 |
| ; VBITS_GE_512-NEXT: ubfiz x11, x11, #25, #1 |
| ; VBITS_GE_512-NEXT: fmov w7, s2 |
| ; VBITS_GE_512-NEXT: fmov w30, s22 |
| ; VBITS_GE_512-NEXT: ubfiz x13, x18, #15, #1 |
| ; VBITS_GE_512-NEXT: orr x9, x9, x10 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x14 |
| ; VBITS_GE_512-NEXT: ubfiz x10, x27, #26, #1 |
| ; VBITS_GE_512-NEXT: fmov w12, s5 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x11 |
| ; VBITS_GE_512-NEXT: ubfiz x11, x28, #27, #1 |
| ; VBITS_GE_512-NEXT: mov z3.b, z0.b[31] |
| ; VBITS_GE_512-NEXT: orr x9, x9, x13 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x10 |
| ; VBITS_GE_512-NEXT: ubfiz x10, x6, #16, #1 |
| ; VBITS_GE_512-NEXT: ubfiz x13, x29, #28, #1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x11 |
| ; VBITS_GE_512-NEXT: ubfiz x11, x7, #17, #1 |
| ; VBITS_GE_512-NEXT: ubfiz x14, x30, #29, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[32] |
| ; VBITS_GE_512-NEXT: orr x9, x9, x10 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x13 |
| ; VBITS_GE_512-NEXT: ubfiz x10, x12, #30, #1 |
| ; VBITS_GE_512-NEXT: fmov w12, s3 |
| ; VBITS_GE_512-NEXT: orr x9, x9, x11 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x14 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[33] |
| ; VBITS_GE_512-NEXT: orr x9, x22, x9 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x10 |
| ; VBITS_GE_512-NEXT: orr x8, x9, x8 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: lsl w10, w12, #31 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[34] |
| ; VBITS_GE_512-NEXT: orr x8, x8, x10 |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #32 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[35] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #33 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[36] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #34 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[37] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #35 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[38] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #36 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[39] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #37 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[40] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #38 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[41] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #39 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[42] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #40 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[43] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #41 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[44] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #42 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[45] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #43 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[46] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #44 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[47] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #45 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[48] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #46 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[49] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #47 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[50] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #48 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[51] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #49 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[52] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #50 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[53] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #51 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[54] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #52 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[55] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #53 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[56] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #54 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[57] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #55 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[58] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #56 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[59] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #57 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[60] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #58 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[61] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: fmov w10, s1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #59 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[62] |
| ; VBITS_GE_512-NEXT: mov z0.b, z0.b[63] |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #60 |
| ; VBITS_GE_512-NEXT: and w9, w10, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #61 |
| ; VBITS_GE_512-NEXT: fmov w9, s2 |
| ; VBITS_GE_512-NEXT: and w9, w9, #0x1 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #62 |
| ; VBITS_GE_512-NEXT: fmov w9, s0 |
| ; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #63 |
| ; VBITS_GE_512-NEXT: tbz w8, #0, .LBB7_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rb { z0.b }, p1/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB7_3 |
| ; VBITS_GE_512-NEXT: b .LBB7_4 |
| ; VBITS_GE_512-NEXT: .LBB7_2: |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI7_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI7_0 |
| ; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB7_4 |
| ; VBITS_GE_512-NEXT: .LBB7_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB7_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB7_68 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB7_69 |
| ; VBITS_GE_512-NEXT: .LBB7_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB7_70 |
| ; VBITS_GE_512-NEXT: .LBB7_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB7_71 |
| ; VBITS_GE_512-NEXT: .LBB7_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB7_72 |
| ; VBITS_GE_512-NEXT: .LBB7_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB7_73 |
| ; VBITS_GE_512-NEXT: .LBB7_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB7_74 |
| ; VBITS_GE_512-NEXT: .LBB7_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB7_75 |
| ; VBITS_GE_512-NEXT: .LBB7_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB7_76 |
| ; VBITS_GE_512-NEXT: .LBB7_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB7_77 |
| ; VBITS_GE_512-NEXT: .LBB7_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB7_78 |
| ; VBITS_GE_512-NEXT: .LBB7_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB7_79 |
| ; VBITS_GE_512-NEXT: .LBB7_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB7_80 |
| ; VBITS_GE_512-NEXT: .LBB7_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB7_81 |
| ; VBITS_GE_512-NEXT: .LBB7_18: // %else58 |
| ; VBITS_GE_512-NEXT: tbnz w8, #16, .LBB7_82 |
| ; VBITS_GE_512-NEXT: .LBB7_19: // %else62 |
| ; VBITS_GE_512-NEXT: tbnz w8, #17, .LBB7_83 |
| ; VBITS_GE_512-NEXT: .LBB7_20: // %else66 |
| ; VBITS_GE_512-NEXT: tbnz w8, #18, .LBB7_84 |
| ; VBITS_GE_512-NEXT: .LBB7_21: // %else70 |
| ; VBITS_GE_512-NEXT: tbnz w8, #19, .LBB7_85 |
| ; VBITS_GE_512-NEXT: .LBB7_22: // %else74 |
| ; VBITS_GE_512-NEXT: tbnz w8, #20, .LBB7_86 |
| ; VBITS_GE_512-NEXT: .LBB7_23: // %else78 |
| ; VBITS_GE_512-NEXT: tbnz w8, #21, .LBB7_87 |
| ; VBITS_GE_512-NEXT: .LBB7_24: // %else82 |
| ; VBITS_GE_512-NEXT: tbnz w8, #22, .LBB7_88 |
| ; VBITS_GE_512-NEXT: .LBB7_25: // %else86 |
| ; VBITS_GE_512-NEXT: tbnz w8, #23, .LBB7_89 |
| ; VBITS_GE_512-NEXT: .LBB7_26: // %else90 |
| ; VBITS_GE_512-NEXT: tbnz w8, #24, .LBB7_90 |
| ; VBITS_GE_512-NEXT: .LBB7_27: // %else94 |
| ; VBITS_GE_512-NEXT: tbnz w8, #25, .LBB7_91 |
| ; VBITS_GE_512-NEXT: .LBB7_28: // %else98 |
| ; VBITS_GE_512-NEXT: tbnz w8, #26, .LBB7_92 |
| ; VBITS_GE_512-NEXT: .LBB7_29: // %else102 |
| ; VBITS_GE_512-NEXT: tbnz w8, #27, .LBB7_93 |
| ; VBITS_GE_512-NEXT: .LBB7_30: // %else106 |
| ; VBITS_GE_512-NEXT: tbnz w8, #28, .LBB7_94 |
| ; VBITS_GE_512-NEXT: .LBB7_31: // %else110 |
| ; VBITS_GE_512-NEXT: tbnz w8, #29, .LBB7_95 |
| ; VBITS_GE_512-NEXT: .LBB7_32: // %else114 |
| ; VBITS_GE_512-NEXT: tbnz w8, #30, .LBB7_96 |
| ; VBITS_GE_512-NEXT: .LBB7_33: // %else118 |
| ; VBITS_GE_512-NEXT: tbnz w8, #31, .LBB7_97 |
| ; VBITS_GE_512-NEXT: .LBB7_34: // %else122 |
| ; VBITS_GE_512-NEXT: tbnz x8, #32, .LBB7_98 |
| ; VBITS_GE_512-NEXT: .LBB7_35: // %else126 |
| ; VBITS_GE_512-NEXT: tbnz x8, #33, .LBB7_99 |
| ; VBITS_GE_512-NEXT: .LBB7_36: // %else130 |
| ; VBITS_GE_512-NEXT: tbnz x8, #34, .LBB7_100 |
| ; VBITS_GE_512-NEXT: .LBB7_37: // %else134 |
| ; VBITS_GE_512-NEXT: tbnz x8, #35, .LBB7_101 |
| ; VBITS_GE_512-NEXT: .LBB7_38: // %else138 |
| ; VBITS_GE_512-NEXT: tbnz x8, #36, .LBB7_102 |
| ; VBITS_GE_512-NEXT: .LBB7_39: // %else142 |
| ; VBITS_GE_512-NEXT: tbnz x8, #37, .LBB7_103 |
| ; VBITS_GE_512-NEXT: .LBB7_40: // %else146 |
| ; VBITS_GE_512-NEXT: tbnz x8, #38, .LBB7_104 |
| ; VBITS_GE_512-NEXT: .LBB7_41: // %else150 |
| ; VBITS_GE_512-NEXT: tbnz x8, #39, .LBB7_105 |
| ; VBITS_GE_512-NEXT: .LBB7_42: // %else154 |
| ; VBITS_GE_512-NEXT: tbnz x8, #40, .LBB7_106 |
| ; VBITS_GE_512-NEXT: .LBB7_43: // %else158 |
| ; VBITS_GE_512-NEXT: tbnz x8, #41, .LBB7_107 |
| ; VBITS_GE_512-NEXT: .LBB7_44: // %else162 |
| ; VBITS_GE_512-NEXT: tbnz x8, #42, .LBB7_108 |
| ; VBITS_GE_512-NEXT: .LBB7_45: // %else166 |
| ; VBITS_GE_512-NEXT: tbnz x8, #43, .LBB7_109 |
| ; VBITS_GE_512-NEXT: .LBB7_46: // %else170 |
| ; VBITS_GE_512-NEXT: tbnz x8, #44, .LBB7_110 |
| ; VBITS_GE_512-NEXT: .LBB7_47: // %else174 |
| ; VBITS_GE_512-NEXT: tbnz x8, #45, .LBB7_111 |
| ; VBITS_GE_512-NEXT: .LBB7_48: // %else178 |
| ; VBITS_GE_512-NEXT: tbnz x8, #46, .LBB7_112 |
| ; VBITS_GE_512-NEXT: .LBB7_49: // %else182 |
| ; VBITS_GE_512-NEXT: tbnz x8, #47, .LBB7_113 |
| ; VBITS_GE_512-NEXT: .LBB7_50: // %else186 |
| ; VBITS_GE_512-NEXT: tbnz x8, #48, .LBB7_114 |
| ; VBITS_GE_512-NEXT: .LBB7_51: // %else190 |
| ; VBITS_GE_512-NEXT: tbnz x8, #49, .LBB7_115 |
| ; VBITS_GE_512-NEXT: .LBB7_52: // %else194 |
| ; VBITS_GE_512-NEXT: tbnz x8, #50, .LBB7_116 |
| ; VBITS_GE_512-NEXT: .LBB7_53: // %else198 |
| ; VBITS_GE_512-NEXT: tbnz x8, #51, .LBB7_117 |
| ; VBITS_GE_512-NEXT: .LBB7_54: // %else202 |
| ; VBITS_GE_512-NEXT: tbnz x8, #52, .LBB7_118 |
| ; VBITS_GE_512-NEXT: .LBB7_55: // %else206 |
| ; VBITS_GE_512-NEXT: tbnz x8, #53, .LBB7_119 |
| ; VBITS_GE_512-NEXT: .LBB7_56: // %else210 |
| ; VBITS_GE_512-NEXT: tbnz x8, #54, .LBB7_120 |
| ; VBITS_GE_512-NEXT: .LBB7_57: // %else214 |
| ; VBITS_GE_512-NEXT: tbnz x8, #55, .LBB7_121 |
| ; VBITS_GE_512-NEXT: .LBB7_58: // %else218 |
| ; VBITS_GE_512-NEXT: tbnz x8, #56, .LBB7_122 |
| ; VBITS_GE_512-NEXT: .LBB7_59: // %else222 |
| ; VBITS_GE_512-NEXT: tbnz x8, #57, .LBB7_123 |
| ; VBITS_GE_512-NEXT: .LBB7_60: // %else226 |
| ; VBITS_GE_512-NEXT: tbnz x8, #58, .LBB7_124 |
| ; VBITS_GE_512-NEXT: .LBB7_61: // %else230 |
| ; VBITS_GE_512-NEXT: tbnz x8, #59, .LBB7_125 |
| ; VBITS_GE_512-NEXT: .LBB7_62: // %else234 |
| ; VBITS_GE_512-NEXT: tbnz x8, #60, .LBB7_126 |
| ; VBITS_GE_512-NEXT: .LBB7_63: // %else238 |
| ; VBITS_GE_512-NEXT: tbnz x8, #61, .LBB7_127 |
| ; VBITS_GE_512-NEXT: .LBB7_64: // %else242 |
| ; VBITS_GE_512-NEXT: tbnz x8, #62, .LBB7_128 |
| ; VBITS_GE_512-NEXT: .LBB7_65: // %else246 |
| ; VBITS_GE_512-NEXT: tbz x8, #63, .LBB7_67 |
| ; VBITS_GE_512-NEXT: .LBB7_66: // %cond.load249 |
| ; VBITS_GE_512-NEXT: mov w8, #63 // =0x3f |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w8 |
| ; VBITS_GE_512-NEXT: ldrb w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB7_67: // %else250 |
| ; VBITS_GE_512-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x2] |
| ; VBITS_GE_512-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: add sp, sp, #112 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB7_68: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB7_6 |
| ; VBITS_GE_512-NEXT: .LBB7_69: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB7_7 |
| ; VBITS_GE_512-NEXT: .LBB7_70: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB7_8 |
| ; VBITS_GE_512-NEXT: .LBB7_71: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB7_9 |
| ; VBITS_GE_512-NEXT: .LBB7_72: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB7_10 |
| ; VBITS_GE_512-NEXT: .LBB7_73: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB7_11 |
| ; VBITS_GE_512-NEXT: .LBB7_74: // %cond.load29 |
| ; VBITS_GE_512-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB7_12 |
| ; VBITS_GE_512-NEXT: .LBB7_75: // %cond.load33 |
| ; VBITS_GE_512-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB7_13 |
| ; VBITS_GE_512-NEXT: .LBB7_76: // %cond.load37 |
| ; VBITS_GE_512-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB7_14 |
| ; VBITS_GE_512-NEXT: .LBB7_77: // %cond.load41 |
| ; VBITS_GE_512-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB7_15 |
| ; VBITS_GE_512-NEXT: .LBB7_78: // %cond.load45 |
| ; VBITS_GE_512-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB7_16 |
| ; VBITS_GE_512-NEXT: .LBB7_79: // %cond.load49 |
| ; VBITS_GE_512-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB7_17 |
| ; VBITS_GE_512-NEXT: .LBB7_80: // %cond.load53 |
| ; VBITS_GE_512-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB7_18 |
| ; VBITS_GE_512-NEXT: .LBB7_81: // %cond.load57 |
| ; VBITS_GE_512-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #16, .LBB7_19 |
| ; VBITS_GE_512-NEXT: .LBB7_82: // %cond.load61 |
| ; VBITS_GE_512-NEXT: mov w9, #16 // =0x10 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #17, .LBB7_20 |
| ; VBITS_GE_512-NEXT: .LBB7_83: // %cond.load65 |
| ; VBITS_GE_512-NEXT: mov w9, #17 // =0x11 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #18, .LBB7_21 |
| ; VBITS_GE_512-NEXT: .LBB7_84: // %cond.load69 |
| ; VBITS_GE_512-NEXT: mov w9, #18 // =0x12 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #19, .LBB7_22 |
| ; VBITS_GE_512-NEXT: .LBB7_85: // %cond.load73 |
| ; VBITS_GE_512-NEXT: mov w9, #19 // =0x13 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #20, .LBB7_23 |
| ; VBITS_GE_512-NEXT: .LBB7_86: // %cond.load77 |
| ; VBITS_GE_512-NEXT: mov w9, #20 // =0x14 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #21, .LBB7_24 |
| ; VBITS_GE_512-NEXT: .LBB7_87: // %cond.load81 |
| ; VBITS_GE_512-NEXT: mov w9, #21 // =0x15 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #22, .LBB7_25 |
| ; VBITS_GE_512-NEXT: .LBB7_88: // %cond.load85 |
| ; VBITS_GE_512-NEXT: mov w9, #22 // =0x16 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #23, .LBB7_26 |
| ; VBITS_GE_512-NEXT: .LBB7_89: // %cond.load89 |
| ; VBITS_GE_512-NEXT: mov w9, #23 // =0x17 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #24, .LBB7_27 |
| ; VBITS_GE_512-NEXT: .LBB7_90: // %cond.load93 |
| ; VBITS_GE_512-NEXT: mov w9, #24 // =0x18 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #25, .LBB7_28 |
| ; VBITS_GE_512-NEXT: .LBB7_91: // %cond.load97 |
| ; VBITS_GE_512-NEXT: mov w9, #25 // =0x19 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #26, .LBB7_29 |
| ; VBITS_GE_512-NEXT: .LBB7_92: // %cond.load101 |
| ; VBITS_GE_512-NEXT: mov w9, #26 // =0x1a |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #27, .LBB7_30 |
| ; VBITS_GE_512-NEXT: .LBB7_93: // %cond.load105 |
| ; VBITS_GE_512-NEXT: mov w9, #27 // =0x1b |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #28, .LBB7_31 |
| ; VBITS_GE_512-NEXT: .LBB7_94: // %cond.load109 |
| ; VBITS_GE_512-NEXT: mov w9, #28 // =0x1c |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #29, .LBB7_32 |
| ; VBITS_GE_512-NEXT: .LBB7_95: // %cond.load113 |
| ; VBITS_GE_512-NEXT: mov w9, #29 // =0x1d |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #30, .LBB7_33 |
| ; VBITS_GE_512-NEXT: .LBB7_96: // %cond.load117 |
| ; VBITS_GE_512-NEXT: mov w9, #30 // =0x1e |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #31, .LBB7_34 |
| ; VBITS_GE_512-NEXT: .LBB7_97: // %cond.load121 |
| ; VBITS_GE_512-NEXT: mov w9, #31 // =0x1f |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #32, .LBB7_35 |
| ; VBITS_GE_512-NEXT: .LBB7_98: // %cond.load125 |
| ; VBITS_GE_512-NEXT: mov w9, #32 // =0x20 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #33, .LBB7_36 |
| ; VBITS_GE_512-NEXT: .LBB7_99: // %cond.load129 |
| ; VBITS_GE_512-NEXT: mov w9, #33 // =0x21 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #34, .LBB7_37 |
| ; VBITS_GE_512-NEXT: .LBB7_100: // %cond.load133 |
| ; VBITS_GE_512-NEXT: mov w9, #34 // =0x22 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #35, .LBB7_38 |
| ; VBITS_GE_512-NEXT: .LBB7_101: // %cond.load137 |
| ; VBITS_GE_512-NEXT: mov w9, #35 // =0x23 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #36, .LBB7_39 |
| ; VBITS_GE_512-NEXT: .LBB7_102: // %cond.load141 |
| ; VBITS_GE_512-NEXT: mov w9, #36 // =0x24 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #37, .LBB7_40 |
| ; VBITS_GE_512-NEXT: .LBB7_103: // %cond.load145 |
| ; VBITS_GE_512-NEXT: mov w9, #37 // =0x25 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #38, .LBB7_41 |
| ; VBITS_GE_512-NEXT: .LBB7_104: // %cond.load149 |
| ; VBITS_GE_512-NEXT: mov w9, #38 // =0x26 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #39, .LBB7_42 |
| ; VBITS_GE_512-NEXT: .LBB7_105: // %cond.load153 |
| ; VBITS_GE_512-NEXT: mov w9, #39 // =0x27 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #40, .LBB7_43 |
| ; VBITS_GE_512-NEXT: .LBB7_106: // %cond.load157 |
| ; VBITS_GE_512-NEXT: mov w9, #40 // =0x28 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #41, .LBB7_44 |
| ; VBITS_GE_512-NEXT: .LBB7_107: // %cond.load161 |
| ; VBITS_GE_512-NEXT: mov w9, #41 // =0x29 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #42, .LBB7_45 |
| ; VBITS_GE_512-NEXT: .LBB7_108: // %cond.load165 |
| ; VBITS_GE_512-NEXT: mov w9, #42 // =0x2a |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #43, .LBB7_46 |
| ; VBITS_GE_512-NEXT: .LBB7_109: // %cond.load169 |
| ; VBITS_GE_512-NEXT: mov w9, #43 // =0x2b |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #44, .LBB7_47 |
| ; VBITS_GE_512-NEXT: .LBB7_110: // %cond.load173 |
| ; VBITS_GE_512-NEXT: mov w9, #44 // =0x2c |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #45, .LBB7_48 |
| ; VBITS_GE_512-NEXT: .LBB7_111: // %cond.load177 |
| ; VBITS_GE_512-NEXT: mov w9, #45 // =0x2d |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #46, .LBB7_49 |
| ; VBITS_GE_512-NEXT: .LBB7_112: // %cond.load181 |
| ; VBITS_GE_512-NEXT: mov w9, #46 // =0x2e |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #47, .LBB7_50 |
| ; VBITS_GE_512-NEXT: .LBB7_113: // %cond.load185 |
| ; VBITS_GE_512-NEXT: mov w9, #47 // =0x2f |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #48, .LBB7_51 |
| ; VBITS_GE_512-NEXT: .LBB7_114: // %cond.load189 |
| ; VBITS_GE_512-NEXT: mov w9, #48 // =0x30 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #49, .LBB7_52 |
| ; VBITS_GE_512-NEXT: .LBB7_115: // %cond.load193 |
| ; VBITS_GE_512-NEXT: mov w9, #49 // =0x31 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #50, .LBB7_53 |
| ; VBITS_GE_512-NEXT: .LBB7_116: // %cond.load197 |
| ; VBITS_GE_512-NEXT: mov w9, #50 // =0x32 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #51, .LBB7_54 |
| ; VBITS_GE_512-NEXT: .LBB7_117: // %cond.load201 |
| ; VBITS_GE_512-NEXT: mov w9, #51 // =0x33 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #52, .LBB7_55 |
| ; VBITS_GE_512-NEXT: .LBB7_118: // %cond.load205 |
| ; VBITS_GE_512-NEXT: mov w9, #52 // =0x34 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #53, .LBB7_56 |
| ; VBITS_GE_512-NEXT: .LBB7_119: // %cond.load209 |
| ; VBITS_GE_512-NEXT: mov w9, #53 // =0x35 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #54, .LBB7_57 |
| ; VBITS_GE_512-NEXT: .LBB7_120: // %cond.load213 |
| ; VBITS_GE_512-NEXT: mov w9, #54 // =0x36 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #55, .LBB7_58 |
| ; VBITS_GE_512-NEXT: .LBB7_121: // %cond.load217 |
| ; VBITS_GE_512-NEXT: mov w9, #55 // =0x37 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #56, .LBB7_59 |
| ; VBITS_GE_512-NEXT: .LBB7_122: // %cond.load221 |
| ; VBITS_GE_512-NEXT: mov w9, #56 // =0x38 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #57, .LBB7_60 |
| ; VBITS_GE_512-NEXT: .LBB7_123: // %cond.load225 |
| ; VBITS_GE_512-NEXT: mov w9, #57 // =0x39 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #58, .LBB7_61 |
| ; VBITS_GE_512-NEXT: .LBB7_124: // %cond.load229 |
| ; VBITS_GE_512-NEXT: mov w9, #58 // =0x3a |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #59, .LBB7_62 |
| ; VBITS_GE_512-NEXT: .LBB7_125: // %cond.load233 |
| ; VBITS_GE_512-NEXT: mov w9, #59 // =0x3b |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #60, .LBB7_63 |
| ; VBITS_GE_512-NEXT: .LBB7_126: // %cond.load237 |
| ; VBITS_GE_512-NEXT: mov w9, #60 // =0x3c |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #61, .LBB7_64 |
| ; VBITS_GE_512-NEXT: .LBB7_127: // %cond.load241 |
| ; VBITS_GE_512-NEXT: mov w9, #61 // =0x3d |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz x8, #62, .LBB7_65 |
| ; VBITS_GE_512-NEXT: .LBB7_128: // %cond.load245 |
| ; VBITS_GE_512-NEXT: mov w9, #62 // =0x3e |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz x8, #63, .LBB7_66 |
| ; VBITS_GE_512-NEXT: b .LBB7_67 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_v64i8: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: sub sp, sp, #96 |
| ; CHECK-EXPAND-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-EXPAND-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-EXPAND-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-EXPAND-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-EXPAND-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 96 |
| ; CHECK-EXPAND-NEXT: .cfi_offset w19, -8 |
| ; CHECK-EXPAND-NEXT: .cfi_offset w20, -16 |
| ; CHECK-EXPAND-NEXT: .cfi_offset w21, -24 |
| ; CHECK-EXPAND-NEXT: .cfi_offset w22, -32 |
| ; CHECK-EXPAND-NEXT: .cfi_offset w23, -40 |
| ; CHECK-EXPAND-NEXT: .cfi_offset w24, -48 |
| ; CHECK-EXPAND-NEXT: .cfi_offset w25, -56 |
| ; CHECK-EXPAND-NEXT: .cfi_offset w26, -64 |
| ; CHECK-EXPAND-NEXT: .cfi_offset w27, -72 |
| ; CHECK-EXPAND-NEXT: .cfi_offset w28, -80 |
| ; CHECK-EXPAND-NEXT: ptrue p0.b, vl32 |
| ; CHECK-EXPAND-NEXT: ptrue p3.s |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ld1b { z1.b }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b |
| ; CHECK-EXPAND-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: umov w13, v0.b[1] |
| ; CHECK-EXPAND-NEXT: fmov w7, s0 |
| ; CHECK-EXPAND-NEXT: umov w12, v0.b[2] |
| ; CHECK-EXPAND-NEXT: umov w3, v0.b[7] |
| ; CHECK-EXPAND-NEXT: umov w5, v0.b[8] |
| ; CHECK-EXPAND-NEXT: umov w6, v0.b[9] |
| ; CHECK-EXPAND-NEXT: umov w11, v0.b[3] |
| ; CHECK-EXPAND-NEXT: umov w17, v0.b[10] |
| ; CHECK-EXPAND-NEXT: umov w18, v0.b[11] |
| ; CHECK-EXPAND-NEXT: and w7, w7, #0x1 |
| ; CHECK-EXPAND-NEXT: mov z6.b, z0.b[18] |
| ; CHECK-EXPAND-NEXT: mov z7.b, z0.b[19] |
| ; CHECK-EXPAND-NEXT: bfi w7, w13, #1, #1 |
| ; CHECK-EXPAND-NEXT: umov w10, v0.b[4] |
| ; CHECK-EXPAND-NEXT: mov z16.b, z0.b[20] |
| ; CHECK-EXPAND-NEXT: ubfiz w13, w3, #7, #1 |
| ; CHECK-EXPAND-NEXT: ubfiz w3, w5, #8, #1 |
| ; CHECK-EXPAND-NEXT: umov w4, v0.b[12] |
| ; CHECK-EXPAND-NEXT: bfi w7, w12, #2, #1 |
| ; CHECK-EXPAND-NEXT: mov z17.b, z0.b[21] |
| ; CHECK-EXPAND-NEXT: ubfiz w6, w6, #9, #1 |
| ; CHECK-EXPAND-NEXT: umov w9, v0.b[5] |
| ; CHECK-EXPAND-NEXT: umov w15, v0.b[13] |
| ; CHECK-EXPAND-NEXT: mov z18.b, z0.b[22] |
| ; CHECK-EXPAND-NEXT: fmov w21, s6 |
| ; CHECK-EXPAND-NEXT: fmov w22, s7 |
| ; CHECK-EXPAND-NEXT: orr w12, w13, w3 |
| ; CHECK-EXPAND-NEXT: ubfiz w17, w17, #10, #1 |
| ; CHECK-EXPAND-NEXT: bfi w7, w11, #3, #1 |
| ; CHECK-EXPAND-NEXT: mov z19.b, z0.b[23] |
| ; CHECK-EXPAND-NEXT: fmov w23, s16 |
| ; CHECK-EXPAND-NEXT: orr w12, w12, w6 |
| ; CHECK-EXPAND-NEXT: ubfiz w18, w18, #11, #1 |
| ; CHECK-EXPAND-NEXT: mov z20.b, z0.b[24] |
| ; CHECK-EXPAND-NEXT: fmov w24, s17 |
| ; CHECK-EXPAND-NEXT: orr w12, w12, w17 |
| ; CHECK-EXPAND-NEXT: bfi w7, w10, #4, #1 |
| ; CHECK-EXPAND-NEXT: umov w16, v0.b[14] |
| ; CHECK-EXPAND-NEXT: mov z21.b, z0.b[25] |
| ; CHECK-EXPAND-NEXT: fmov w25, s18 |
| ; CHECK-EXPAND-NEXT: ubfiz w3, w4, #12, #1 |
| ; CHECK-EXPAND-NEXT: orr w10, w12, w18 |
| ; CHECK-EXPAND-NEXT: ubfiz w12, w21, #18, #1 |
| ; CHECK-EXPAND-NEXT: ubfiz w18, w22, #19, #1 |
| ; CHECK-EXPAND-NEXT: umov w14, v0.b[15] |
| ; CHECK-EXPAND-NEXT: mov z22.b, z0.b[26] |
| ; CHECK-EXPAND-NEXT: fmov w26, s19 |
| ; CHECK-EXPAND-NEXT: bfi w7, w9, #5, #1 |
| ; CHECK-EXPAND-NEXT: ubfiz w9, w15, #13, #1 |
| ; CHECK-EXPAND-NEXT: ubfiz w15, w23, #20, #1 |
| ; CHECK-EXPAND-NEXT: mov z23.b, z0.b[27] |
| ; CHECK-EXPAND-NEXT: fmov w5, s20 |
| ; CHECK-EXPAND-NEXT: orr w10, w10, w3 |
| ; CHECK-EXPAND-NEXT: orr w12, w12, w18 |
| ; CHECK-EXPAND-NEXT: ubfiz w18, w24, #21, #1 |
| ; CHECK-EXPAND-NEXT: umov w8, v0.b[6] |
| ; CHECK-EXPAND-NEXT: fmov w27, s21 |
| ; CHECK-EXPAND-NEXT: orr w9, w10, w9 |
| ; CHECK-EXPAND-NEXT: orr w10, w12, w15 |
| ; CHECK-EXPAND-NEXT: ubfiz w12, w25, #22, #1 |
| ; CHECK-EXPAND-NEXT: fmov w28, s22 |
| ; CHECK-EXPAND-NEXT: ubfiz w16, w16, #14, #1 |
| ; CHECK-EXPAND-NEXT: orr w10, w10, w18 |
| ; CHECK-EXPAND-NEXT: ubfiz w15, w26, #23, #1 |
| ; CHECK-EXPAND-NEXT: mov z4.b, z0.b[16] |
| ; CHECK-EXPAND-NEXT: mov z24.b, z0.b[28] |
| ; CHECK-EXPAND-NEXT: fmov w13, s23 |
| ; CHECK-EXPAND-NEXT: orr w10, w10, w12 |
| ; CHECK-EXPAND-NEXT: ubfiz w12, w14, #15, #1 |
| ; CHECK-EXPAND-NEXT: ubfiz w14, w5, #24, #1 |
| ; CHECK-EXPAND-NEXT: mov z5.b, z0.b[17] |
| ; CHECK-EXPAND-NEXT: mov z3.b, z0.b[29] |
| ; CHECK-EXPAND-NEXT: orr w9, w9, w16 |
| ; CHECK-EXPAND-NEXT: orr w10, w10, w15 |
| ; CHECK-EXPAND-NEXT: ubfiz w16, w27, #25, #1 |
| ; CHECK-EXPAND-NEXT: mov z2.b, z0.b[30] |
| ; CHECK-EXPAND-NEXT: bfi w7, w8, #6, #1 |
| ; CHECK-EXPAND-NEXT: orr w8, w9, w12 |
| ; CHECK-EXPAND-NEXT: orr w9, w10, w14 |
| ; CHECK-EXPAND-NEXT: ubfiz w10, w28, #26, #1 |
| ; CHECK-EXPAND-NEXT: fmov w19, s4 |
| ; CHECK-EXPAND-NEXT: fmov w17, s24 |
| ; CHECK-EXPAND-NEXT: orr w9, w9, w16 |
| ; CHECK-EXPAND-NEXT: ubfiz w13, w13, #27, #1 |
| ; CHECK-EXPAND-NEXT: fmov w20, s5 |
| ; CHECK-EXPAND-NEXT: fmov w12, s3 |
| ; CHECK-EXPAND-NEXT: orr w9, w9, w10 |
| ; CHECK-EXPAND-NEXT: orr w9, w9, w13 |
| ; CHECK-EXPAND-NEXT: fmov w13, s2 |
| ; CHECK-EXPAND-NEXT: ubfiz w15, w19, #16, #1 |
| ; CHECK-EXPAND-NEXT: ubfiz w14, w17, #28, #1 |
| ; CHECK-EXPAND-NEXT: mov z1.b, z0.b[31] |
| ; CHECK-EXPAND-NEXT: mov w11, #32 // =0x20 |
| ; CHECK-EXPAND-NEXT: ubfiz w10, w20, #17, #1 |
| ; CHECK-EXPAND-NEXT: ubfiz w12, w12, #29, #1 |
| ; CHECK-EXPAND-NEXT: orr w8, w8, w15 |
| ; CHECK-EXPAND-NEXT: orr w9, w9, w14 |
| ; CHECK-EXPAND-NEXT: ubfiz w13, w13, #30, #1 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x1, x11] |
| ; CHECK-EXPAND-NEXT: orr w8, w8, w10 |
| ; CHECK-EXPAND-NEXT: orr w9, w9, w12 |
| ; CHECK-EXPAND-NEXT: ld1b { z3.b }, p0/z, [x0, x11] |
| ; CHECK-EXPAND-NEXT: orr w8, w7, w8 |
| ; CHECK-EXPAND-NEXT: orr w9, w9, w13 |
| ; CHECK-EXPAND-NEXT: orr w8, w8, w9 |
| ; CHECK-EXPAND-NEXT: fmov w9, s1 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.b, p0/z, z3.b, z0.b |
| ; CHECK-EXPAND-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-EXPAND-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-EXPAND-NEXT: orr w8, w8, w9, lsl #31 |
| ; CHECK-EXPAND-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-EXPAND-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-EXPAND-NEXT: fmov s0, w8 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b |
| ; CHECK-EXPAND-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s |
| ; CHECK-EXPAND-NEXT: fmov w10, s0 |
| ; CHECK-EXPAND-NEXT: whilelo p3.b, xzr, x9 |
| ; CHECK-EXPAND-NEXT: whilelo p4.b, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p3/z, [x0, x10] |
| ; CHECK-EXPAND-NEXT: ld1b { z1.b }, p4/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b |
| ; CHECK-EXPAND-NEXT: expand z1.b, p1, z1.b |
| ; CHECK-EXPAND-NEXT: st1b { z0.b }, p0, [x2, x11] |
| ; CHECK-EXPAND-NEXT: st1b { z1.b }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: add sp, sp, #96 |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <64 x i8>, ptr %ap |
| %b = load <64 x i8>, ptr %bp |
| %mask = icmp eq <64 x i8> %a, %b |
| %load = call <64 x i8> @llvm.masked.expandload.v64i8(ptr %ap, <64 x i1> %mask, <64 x i8> poison) |
| store <64 x i8> %load, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_v32i16(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_v32i16: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 |
| ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x8, lsl #1] |
| ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1, x8, lsl #1] |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.h |
| ; VBITS_GE_256-NEXT: mov z2.h, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: uzp1 z1.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z2.b, z2.b |
| ; VBITS_GE_256-NEXT: umov w8, v1.b[0] |
| ; VBITS_GE_256-NEXT: umov w13, v1.b[1] |
| ; VBITS_GE_256-NEXT: umov w9, v1.b[7] |
| ; VBITS_GE_256-NEXT: umov w11, v0.b[3] |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[4] |
| ; VBITS_GE_256-NEXT: umov w10, v1.b[8] |
| ; VBITS_GE_256-NEXT: umov w16, v1.b[9] |
| ; VBITS_GE_256-NEXT: umov w17, v1.b[10] |
| ; VBITS_GE_256-NEXT: umov w18, v0.b[5] |
| ; VBITS_GE_256-NEXT: umov w14, v1.b[2] |
| ; VBITS_GE_256-NEXT: umov w15, v1.b[3] |
| ; VBITS_GE_256-NEXT: umov w1, v1.b[4] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: ubfiz w9, w9, #7, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w11, #19, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #20, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w13, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[6] |
| ; VBITS_GE_256-NEXT: ubfiz w10, w10, #8, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w16, #9, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: umov w12, v1.b[11] |
| ; VBITS_GE_256-NEXT: ubfiz w17, w17, #10, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_256-NEXT: ubfiz w18, w18, #21, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w14, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w14, v0.b[7] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: umov w16, v1.b[12] |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #22, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w18 |
| ; VBITS_GE_256-NEXT: umov w18, v0.b[8] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w17 |
| ; VBITS_GE_256-NEXT: umov w17, v1.b[13] |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #11, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w13 |
| ; VBITS_GE_256-NEXT: umov w13, v1.b[14] |
| ; VBITS_GE_256-NEXT: bfi w8, w15, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w15, v0.b[9] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[10] |
| ; VBITS_GE_256-NEXT: ubfiz w14, w14, #23, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w16, #12, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w18, w18, #24, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w17, w17, #13, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v1.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w1, #4, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w14 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: umov w16, v1.b[15] |
| ; VBITS_GE_256-NEXT: ubfiz w15, w15, #25, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #14, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w18 |
| ; VBITS_GE_256-NEXT: umov w18, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w1, v0.b[11] |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #26, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w17 |
| ; VBITS_GE_256-NEXT: umov w17, v0.b[1] |
| ; VBITS_GE_256-NEXT: orr w11, w11, w15 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_256-NEXT: umov w14, v1.b[6] |
| ; VBITS_GE_256-NEXT: umov w15, v0.b[2] |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_256-NEXT: ubfiz w16, w16, #15, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[14] |
| ; VBITS_GE_256-NEXT: ubfiz w1, w1, #27, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w18, w18, #16, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w17, #17, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #28, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w18 |
| ; VBITS_GE_256-NEXT: bfi w8, w14, #6, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w14, w15, #18, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #29, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w10, #30, #1 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w14 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_256-NEXT: orr w9, w11, w10 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI8_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI8_0 |
| ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x9] |
| ; VBITS_GE_256-NEXT: orr w8, w8, w13, lsl #31 |
| ; VBITS_GE_256-NEXT: tbz w8, #0, .LBB8_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rh { z2.h }, p1/z, [x0] |
| ; VBITS_GE_256-NEXT: mov z1.d, z0.d |
| ; VBITS_GE_256-NEXT: add x0, x0, #2 |
| ; VBITS_GE_256-NEXT: mov z0.d, z2.d |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB8_3 |
| ; VBITS_GE_256-NEXT: b .LBB8_4 |
| ; VBITS_GE_256-NEXT: .LBB8_2: |
| ; VBITS_GE_256-NEXT: mov z1.d, z0.d |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB8_4 |
| ; VBITS_GE_256-NEXT: .LBB8_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB8_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB8_36 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB8_37 |
| ; VBITS_GE_256-NEXT: .LBB8_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB8_38 |
| ; VBITS_GE_256-NEXT: .LBB8_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB8_39 |
| ; VBITS_GE_256-NEXT: .LBB8_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB8_40 |
| ; VBITS_GE_256-NEXT: .LBB8_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB8_41 |
| ; VBITS_GE_256-NEXT: .LBB8_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB8_42 |
| ; VBITS_GE_256-NEXT: .LBB8_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB8_43 |
| ; VBITS_GE_256-NEXT: .LBB8_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB8_44 |
| ; VBITS_GE_256-NEXT: .LBB8_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB8_45 |
| ; VBITS_GE_256-NEXT: .LBB8_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB8_46 |
| ; VBITS_GE_256-NEXT: .LBB8_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB8_47 |
| ; VBITS_GE_256-NEXT: .LBB8_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB8_48 |
| ; VBITS_GE_256-NEXT: .LBB8_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB8_49 |
| ; VBITS_GE_256-NEXT: .LBB8_18: // %else58 |
| ; VBITS_GE_256-NEXT: tbnz w8, #16, .LBB8_50 |
| ; VBITS_GE_256-NEXT: .LBB8_19: // %else62 |
| ; VBITS_GE_256-NEXT: tbnz w8, #17, .LBB8_51 |
| ; VBITS_GE_256-NEXT: .LBB8_20: // %else66 |
| ; VBITS_GE_256-NEXT: tbnz w8, #18, .LBB8_52 |
| ; VBITS_GE_256-NEXT: .LBB8_21: // %else70 |
| ; VBITS_GE_256-NEXT: tbnz w8, #19, .LBB8_53 |
| ; VBITS_GE_256-NEXT: .LBB8_22: // %else74 |
| ; VBITS_GE_256-NEXT: tbnz w8, #20, .LBB8_54 |
| ; VBITS_GE_256-NEXT: .LBB8_23: // %else78 |
| ; VBITS_GE_256-NEXT: tbnz w8, #21, .LBB8_55 |
| ; VBITS_GE_256-NEXT: .LBB8_24: // %else82 |
| ; VBITS_GE_256-NEXT: tbnz w8, #22, .LBB8_56 |
| ; VBITS_GE_256-NEXT: .LBB8_25: // %else86 |
| ; VBITS_GE_256-NEXT: tbnz w8, #23, .LBB8_57 |
| ; VBITS_GE_256-NEXT: .LBB8_26: // %else90 |
| ; VBITS_GE_256-NEXT: tbnz w8, #24, .LBB8_58 |
| ; VBITS_GE_256-NEXT: .LBB8_27: // %else94 |
| ; VBITS_GE_256-NEXT: tbnz w8, #25, .LBB8_59 |
| ; VBITS_GE_256-NEXT: .LBB8_28: // %else98 |
| ; VBITS_GE_256-NEXT: tbnz w8, #26, .LBB8_60 |
| ; VBITS_GE_256-NEXT: .LBB8_29: // %else102 |
| ; VBITS_GE_256-NEXT: tbnz w8, #27, .LBB8_61 |
| ; VBITS_GE_256-NEXT: .LBB8_30: // %else106 |
| ; VBITS_GE_256-NEXT: tbnz w8, #28, .LBB8_62 |
| ; VBITS_GE_256-NEXT: .LBB8_31: // %else110 |
| ; VBITS_GE_256-NEXT: tbnz w8, #29, .LBB8_63 |
| ; VBITS_GE_256-NEXT: .LBB8_32: // %else114 |
| ; VBITS_GE_256-NEXT: tbnz w8, #30, .LBB8_64 |
| ; VBITS_GE_256-NEXT: .LBB8_33: // %else118 |
| ; VBITS_GE_256-NEXT: tbz w8, #31, .LBB8_35 |
| ; VBITS_GE_256-NEXT: .LBB8_34: // %cond.load121 |
| ; VBITS_GE_256-NEXT: mov w8, #15 // =0xf |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w8 |
| ; VBITS_GE_256-NEXT: ldrh w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB8_35: // %else122 |
| ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB8_36: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB8_6 |
| ; VBITS_GE_256-NEXT: .LBB8_37: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB8_7 |
| ; VBITS_GE_256-NEXT: .LBB8_38: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB8_8 |
| ; VBITS_GE_256-NEXT: .LBB8_39: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB8_9 |
| ; VBITS_GE_256-NEXT: .LBB8_40: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB8_10 |
| ; VBITS_GE_256-NEXT: .LBB8_41: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB8_11 |
| ; VBITS_GE_256-NEXT: .LBB8_42: // %cond.load29 |
| ; VBITS_GE_256-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB8_12 |
| ; VBITS_GE_256-NEXT: .LBB8_43: // %cond.load33 |
| ; VBITS_GE_256-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB8_13 |
| ; VBITS_GE_256-NEXT: .LBB8_44: // %cond.load37 |
| ; VBITS_GE_256-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB8_14 |
| ; VBITS_GE_256-NEXT: .LBB8_45: // %cond.load41 |
| ; VBITS_GE_256-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB8_15 |
| ; VBITS_GE_256-NEXT: .LBB8_46: // %cond.load45 |
| ; VBITS_GE_256-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB8_16 |
| ; VBITS_GE_256-NEXT: .LBB8_47: // %cond.load49 |
| ; VBITS_GE_256-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB8_17 |
| ; VBITS_GE_256-NEXT: .LBB8_48: // %cond.load53 |
| ; VBITS_GE_256-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB8_18 |
| ; VBITS_GE_256-NEXT: .LBB8_49: // %cond.load57 |
| ; VBITS_GE_256-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #16, .LBB8_19 |
| ; VBITS_GE_256-NEXT: .LBB8_50: // %cond.load61 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: ptrue p2.h, vl1 |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #17, .LBB8_20 |
| ; VBITS_GE_256-NEXT: .LBB8_51: // %cond.load65 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #18, .LBB8_21 |
| ; VBITS_GE_256-NEXT: .LBB8_52: // %cond.load69 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #19, .LBB8_22 |
| ; VBITS_GE_256-NEXT: .LBB8_53: // %cond.load73 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #20, .LBB8_23 |
| ; VBITS_GE_256-NEXT: .LBB8_54: // %cond.load77 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #21, .LBB8_24 |
| ; VBITS_GE_256-NEXT: .LBB8_55: // %cond.load81 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #22, .LBB8_25 |
| ; VBITS_GE_256-NEXT: .LBB8_56: // %cond.load85 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #23, .LBB8_26 |
| ; VBITS_GE_256-NEXT: .LBB8_57: // %cond.load89 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #24, .LBB8_27 |
| ; VBITS_GE_256-NEXT: .LBB8_58: // %cond.load93 |
| ; VBITS_GE_256-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #25, .LBB8_28 |
| ; VBITS_GE_256-NEXT: .LBB8_59: // %cond.load97 |
| ; VBITS_GE_256-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #26, .LBB8_29 |
| ; VBITS_GE_256-NEXT: .LBB8_60: // %cond.load101 |
| ; VBITS_GE_256-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #27, .LBB8_30 |
| ; VBITS_GE_256-NEXT: .LBB8_61: // %cond.load105 |
| ; VBITS_GE_256-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #28, .LBB8_31 |
| ; VBITS_GE_256-NEXT: .LBB8_62: // %cond.load109 |
| ; VBITS_GE_256-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #29, .LBB8_32 |
| ; VBITS_GE_256-NEXT: .LBB8_63: // %cond.load113 |
| ; VBITS_GE_256-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #30, .LBB8_33 |
| ; VBITS_GE_256-NEXT: .LBB8_64: // %cond.load117 |
| ; VBITS_GE_256-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_256-NEXT: index z2.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h |
| ; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #31, .LBB8_34 |
| ; VBITS_GE_256-NEXT: b .LBB8_35 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_v32i16: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #112 |
| ; VBITS_GE_512-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 112 |
| ; VBITS_GE_512-NEXT: .cfi_offset w19, -8 |
| ; VBITS_GE_512-NEXT: .cfi_offset w20, -16 |
| ; VBITS_GE_512-NEXT: .cfi_offset w21, -24 |
| ; VBITS_GE_512-NEXT: .cfi_offset w22, -32 |
| ; VBITS_GE_512-NEXT: .cfi_offset w23, -40 |
| ; VBITS_GE_512-NEXT: .cfi_offset w24, -48 |
| ; VBITS_GE_512-NEXT: .cfi_offset w25, -56 |
| ; VBITS_GE_512-NEXT: .cfi_offset w26, -64 |
| ; VBITS_GE_512-NEXT: .cfi_offset w27, -72 |
| ; VBITS_GE_512-NEXT: .cfi_offset w28, -80 |
| ; VBITS_GE_512-NEXT: .cfi_offset w30, -88 |
| ; VBITS_GE_512-NEXT: .cfi_offset w29, -96 |
| ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 |
| ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p1.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: mov z5.b, z0.b[18] |
| ; VBITS_GE_512-NEXT: mov z6.b, z0.b[19] |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[1] |
| ; VBITS_GE_512-NEXT: mov z7.b, z0.b[20] |
| ; VBITS_GE_512-NEXT: fmov w6, s0 |
| ; VBITS_GE_512-NEXT: umov w3, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w5, v0.b[8] |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w4, v0.b[9] |
| ; VBITS_GE_512-NEXT: mov z16.b, z0.b[21] |
| ; VBITS_GE_512-NEXT: fmov w20, s5 |
| ; VBITS_GE_512-NEXT: fmov w21, s6 |
| ; VBITS_GE_512-NEXT: umov w1, v0.b[10] |
| ; VBITS_GE_512-NEXT: mov z17.b, z0.b[22] |
| ; VBITS_GE_512-NEXT: fmov w22, s7 |
| ; VBITS_GE_512-NEXT: and w6, w6, #0x1 |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[3] |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[11] |
| ; VBITS_GE_512-NEXT: bfi w6, w12, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w18, v0.b[12] |
| ; VBITS_GE_512-NEXT: mov z18.b, z0.b[23] |
| ; VBITS_GE_512-NEXT: fmov w23, s16 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w3, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w3, w5, #8, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w5, w20, #18, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w20, w21, #19, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: mov z19.b, z0.b[24] |
| ; VBITS_GE_512-NEXT: fmov w24, s17 |
| ; VBITS_GE_512-NEXT: bfi w6, w13, #2, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w4, #9, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w4, w22, #20, #1 |
| ; VBITS_GE_512-NEXT: umov w17, v0.b[13] |
| ; VBITS_GE_512-NEXT: orr w12, w12, w3 |
| ; VBITS_GE_512-NEXT: orr w3, w5, w20 |
| ; VBITS_GE_512-NEXT: ubfiz w1, w1, #10, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: mov z20.b, z0.b[25] |
| ; VBITS_GE_512-NEXT: fmov w25, s18 |
| ; VBITS_GE_512-NEXT: orr w12, w12, w13 |
| ; VBITS_GE_512-NEXT: orr w13, w3, w4 |
| ; VBITS_GE_512-NEXT: ubfiz w3, w23, #21, #1 |
| ; VBITS_GE_512-NEXT: bfi w6, w11, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: mov z21.b, z0.b[26] |
| ; VBITS_GE_512-NEXT: fmov w26, s19 |
| ; VBITS_GE_512-NEXT: orr w11, w12, w1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w16, #11, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w16, w18, #12, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w18, w24, #22, #1 |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[15] |
| ; VBITS_GE_512-NEXT: mov z22.b, z0.b[27] |
| ; VBITS_GE_512-NEXT: orr w13, w13, w3 |
| ; VBITS_GE_512-NEXT: bfi w6, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: fmov w27, s20 |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w17, #13, #1 |
| ; VBITS_GE_512-NEXT: orr w12, w13, w18 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w25, #23, #1 |
| ; VBITS_GE_512-NEXT: fmov w28, s21 |
| ; VBITS_GE_512-NEXT: orr w11, w11, w16 |
| ; VBITS_GE_512-NEXT: bfi w6, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w9, w26, #24, #1 |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[6] |
| ; VBITS_GE_512-NEXT: mov z3.b, z0.b[16] |
| ; VBITS_GE_512-NEXT: mov z23.b, z0.b[28] |
| ; VBITS_GE_512-NEXT: fmov w29, s22 |
| ; VBITS_GE_512-NEXT: orr w10, w11, w10 |
| ; VBITS_GE_512-NEXT: orr w11, w12, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: mov z4.b, z0.b[17] |
| ; VBITS_GE_512-NEXT: mov z24.b, z0.b[29] |
| ; VBITS_GE_512-NEXT: ubfiz w13, w27, #25, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w15, #15, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[30] |
| ; VBITS_GE_512-NEXT: ubfiz w14, w28, #26, #1 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w12 |
| ; VBITS_GE_512-NEXT: fmov w7, s3 |
| ; VBITS_GE_512-NEXT: fmov w30, s23 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w11 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w29, #27, #1 |
| ; VBITS_GE_512-NEXT: str w8, [sp, #8] // 4-byte Spill |
| ; VBITS_GE_512-NEXT: fmov w19, s4 |
| ; VBITS_GE_512-NEXT: fmov w8, s24 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w14 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w11 |
| ; VBITS_GE_512-NEXT: fmov w11, s2 |
| ; VBITS_GE_512-NEXT: ldr w15, [sp, #8] // 4-byte Reload |
| ; VBITS_GE_512-NEXT: ubfiz w12, w7, #16, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w14, w30, #28, #1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[31] |
| ; VBITS_GE_512-NEXT: ubfiz w13, w19, #17, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w8, w8, #29, #1 |
| ; VBITS_GE_512-NEXT: bfi w6, w15, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w12 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w14 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w11, #30, #1 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w13 |
| ; VBITS_GE_512-NEXT: orr w8, w9, w8 |
| ; VBITS_GE_512-NEXT: orr w9, w6, w10 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_512-NEXT: orr w8, w9, w8 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9, lsl #31 |
| ; VBITS_GE_512-NEXT: tbz w8, #0, .LBB8_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rh { z0.h }, p1/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB8_3 |
| ; VBITS_GE_512-NEXT: b .LBB8_4 |
| ; VBITS_GE_512-NEXT: .LBB8_2: |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI8_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI8_0 |
| ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB8_4 |
| ; VBITS_GE_512-NEXT: .LBB8_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB8_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB8_36 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB8_37 |
| ; VBITS_GE_512-NEXT: .LBB8_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB8_38 |
| ; VBITS_GE_512-NEXT: .LBB8_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB8_39 |
| ; VBITS_GE_512-NEXT: .LBB8_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB8_40 |
| ; VBITS_GE_512-NEXT: .LBB8_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB8_41 |
| ; VBITS_GE_512-NEXT: .LBB8_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB8_42 |
| ; VBITS_GE_512-NEXT: .LBB8_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB8_43 |
| ; VBITS_GE_512-NEXT: .LBB8_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB8_44 |
| ; VBITS_GE_512-NEXT: .LBB8_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB8_45 |
| ; VBITS_GE_512-NEXT: .LBB8_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB8_46 |
| ; VBITS_GE_512-NEXT: .LBB8_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB8_47 |
| ; VBITS_GE_512-NEXT: .LBB8_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB8_48 |
| ; VBITS_GE_512-NEXT: .LBB8_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB8_49 |
| ; VBITS_GE_512-NEXT: .LBB8_18: // %else58 |
| ; VBITS_GE_512-NEXT: tbnz w8, #16, .LBB8_50 |
| ; VBITS_GE_512-NEXT: .LBB8_19: // %else62 |
| ; VBITS_GE_512-NEXT: tbnz w8, #17, .LBB8_51 |
| ; VBITS_GE_512-NEXT: .LBB8_20: // %else66 |
| ; VBITS_GE_512-NEXT: tbnz w8, #18, .LBB8_52 |
| ; VBITS_GE_512-NEXT: .LBB8_21: // %else70 |
| ; VBITS_GE_512-NEXT: tbnz w8, #19, .LBB8_53 |
| ; VBITS_GE_512-NEXT: .LBB8_22: // %else74 |
| ; VBITS_GE_512-NEXT: tbnz w8, #20, .LBB8_54 |
| ; VBITS_GE_512-NEXT: .LBB8_23: // %else78 |
| ; VBITS_GE_512-NEXT: tbnz w8, #21, .LBB8_55 |
| ; VBITS_GE_512-NEXT: .LBB8_24: // %else82 |
| ; VBITS_GE_512-NEXT: tbnz w8, #22, .LBB8_56 |
| ; VBITS_GE_512-NEXT: .LBB8_25: // %else86 |
| ; VBITS_GE_512-NEXT: tbnz w8, #23, .LBB8_57 |
| ; VBITS_GE_512-NEXT: .LBB8_26: // %else90 |
| ; VBITS_GE_512-NEXT: tbnz w8, #24, .LBB8_58 |
| ; VBITS_GE_512-NEXT: .LBB8_27: // %else94 |
| ; VBITS_GE_512-NEXT: tbnz w8, #25, .LBB8_59 |
| ; VBITS_GE_512-NEXT: .LBB8_28: // %else98 |
| ; VBITS_GE_512-NEXT: tbnz w8, #26, .LBB8_60 |
| ; VBITS_GE_512-NEXT: .LBB8_29: // %else102 |
| ; VBITS_GE_512-NEXT: tbnz w8, #27, .LBB8_61 |
| ; VBITS_GE_512-NEXT: .LBB8_30: // %else106 |
| ; VBITS_GE_512-NEXT: tbnz w8, #28, .LBB8_62 |
| ; VBITS_GE_512-NEXT: .LBB8_31: // %else110 |
| ; VBITS_GE_512-NEXT: tbnz w8, #29, .LBB8_63 |
| ; VBITS_GE_512-NEXT: .LBB8_32: // %else114 |
| ; VBITS_GE_512-NEXT: tbnz w8, #30, .LBB8_64 |
| ; VBITS_GE_512-NEXT: .LBB8_33: // %else118 |
| ; VBITS_GE_512-NEXT: tbz w8, #31, .LBB8_35 |
| ; VBITS_GE_512-NEXT: .LBB8_34: // %cond.load121 |
| ; VBITS_GE_512-NEXT: mov w8, #31 // =0x1f |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w8 |
| ; VBITS_GE_512-NEXT: ldrh w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB8_35: // %else122 |
| ; VBITS_GE_512-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2] |
| ; VBITS_GE_512-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: add sp, sp, #112 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB8_36: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB8_6 |
| ; VBITS_GE_512-NEXT: .LBB8_37: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB8_7 |
| ; VBITS_GE_512-NEXT: .LBB8_38: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB8_8 |
| ; VBITS_GE_512-NEXT: .LBB8_39: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB8_9 |
| ; VBITS_GE_512-NEXT: .LBB8_40: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB8_10 |
| ; VBITS_GE_512-NEXT: .LBB8_41: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB8_11 |
| ; VBITS_GE_512-NEXT: .LBB8_42: // %cond.load29 |
| ; VBITS_GE_512-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB8_12 |
| ; VBITS_GE_512-NEXT: .LBB8_43: // %cond.load33 |
| ; VBITS_GE_512-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB8_13 |
| ; VBITS_GE_512-NEXT: .LBB8_44: // %cond.load37 |
| ; VBITS_GE_512-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB8_14 |
| ; VBITS_GE_512-NEXT: .LBB8_45: // %cond.load41 |
| ; VBITS_GE_512-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB8_15 |
| ; VBITS_GE_512-NEXT: .LBB8_46: // %cond.load45 |
| ; VBITS_GE_512-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB8_16 |
| ; VBITS_GE_512-NEXT: .LBB8_47: // %cond.load49 |
| ; VBITS_GE_512-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB8_17 |
| ; VBITS_GE_512-NEXT: .LBB8_48: // %cond.load53 |
| ; VBITS_GE_512-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB8_18 |
| ; VBITS_GE_512-NEXT: .LBB8_49: // %cond.load57 |
| ; VBITS_GE_512-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #16, .LBB8_19 |
| ; VBITS_GE_512-NEXT: .LBB8_50: // %cond.load61 |
| ; VBITS_GE_512-NEXT: mov w9, #16 // =0x10 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #17, .LBB8_20 |
| ; VBITS_GE_512-NEXT: .LBB8_51: // %cond.load65 |
| ; VBITS_GE_512-NEXT: mov w9, #17 // =0x11 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #18, .LBB8_21 |
| ; VBITS_GE_512-NEXT: .LBB8_52: // %cond.load69 |
| ; VBITS_GE_512-NEXT: mov w9, #18 // =0x12 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #19, .LBB8_22 |
| ; VBITS_GE_512-NEXT: .LBB8_53: // %cond.load73 |
| ; VBITS_GE_512-NEXT: mov w9, #19 // =0x13 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #20, .LBB8_23 |
| ; VBITS_GE_512-NEXT: .LBB8_54: // %cond.load77 |
| ; VBITS_GE_512-NEXT: mov w9, #20 // =0x14 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #21, .LBB8_24 |
| ; VBITS_GE_512-NEXT: .LBB8_55: // %cond.load81 |
| ; VBITS_GE_512-NEXT: mov w9, #21 // =0x15 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #22, .LBB8_25 |
| ; VBITS_GE_512-NEXT: .LBB8_56: // %cond.load85 |
| ; VBITS_GE_512-NEXT: mov w9, #22 // =0x16 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #23, .LBB8_26 |
| ; VBITS_GE_512-NEXT: .LBB8_57: // %cond.load89 |
| ; VBITS_GE_512-NEXT: mov w9, #23 // =0x17 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #24, .LBB8_27 |
| ; VBITS_GE_512-NEXT: .LBB8_58: // %cond.load93 |
| ; VBITS_GE_512-NEXT: mov w9, #24 // =0x18 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #25, .LBB8_28 |
| ; VBITS_GE_512-NEXT: .LBB8_59: // %cond.load97 |
| ; VBITS_GE_512-NEXT: mov w9, #25 // =0x19 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #26, .LBB8_29 |
| ; VBITS_GE_512-NEXT: .LBB8_60: // %cond.load101 |
| ; VBITS_GE_512-NEXT: mov w9, #26 // =0x1a |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #27, .LBB8_30 |
| ; VBITS_GE_512-NEXT: .LBB8_61: // %cond.load105 |
| ; VBITS_GE_512-NEXT: mov w9, #27 // =0x1b |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #28, .LBB8_31 |
| ; VBITS_GE_512-NEXT: .LBB8_62: // %cond.load109 |
| ; VBITS_GE_512-NEXT: mov w9, #28 // =0x1c |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #29, .LBB8_32 |
| ; VBITS_GE_512-NEXT: .LBB8_63: // %cond.load113 |
| ; VBITS_GE_512-NEXT: mov w9, #29 // =0x1d |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #30, .LBB8_33 |
| ; VBITS_GE_512-NEXT: .LBB8_64: // %cond.load117 |
| ; VBITS_GE_512-NEXT: mov w9, #30 // =0x1e |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #31, .LBB8_34 |
| ; VBITS_GE_512-NEXT: b .LBB8_35 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_v32i16: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: sub sp, sp, #16 |
| ; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-EXPAND-NEXT: ptrue p0.h, vl16 |
| ; CHECK-EXPAND-NEXT: ptrue p3.s |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ld1h { z1.h }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h |
| ; CHECK-EXPAND-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-EXPAND-NEXT: umov w8, v0.b[0] |
| ; CHECK-EXPAND-NEXT: umov w9, v0.b[1] |
| ; CHECK-EXPAND-NEXT: umov w10, v0.b[7] |
| ; CHECK-EXPAND-NEXT: umov w11, v0.b[8] |
| ; CHECK-EXPAND-NEXT: umov w12, v0.b[2] |
| ; CHECK-EXPAND-NEXT: umov w13, v0.b[9] |
| ; CHECK-EXPAND-NEXT: umov w14, v0.b[10] |
| ; CHECK-EXPAND-NEXT: umov w15, v0.b[3] |
| ; CHECK-EXPAND-NEXT: umov w16, v0.b[4] |
| ; CHECK-EXPAND-NEXT: and w8, w8, #0x1 |
| ; CHECK-EXPAND-NEXT: ubfiz w10, w10, #7, #1 |
| ; CHECK-EXPAND-NEXT: bfi w8, w9, #1, #1 |
| ; CHECK-EXPAND-NEXT: umov w9, v0.b[11] |
| ; CHECK-EXPAND-NEXT: ubfiz w11, w11, #8, #1 |
| ; CHECK-EXPAND-NEXT: ubfiz w13, w13, #9, #1 |
| ; CHECK-EXPAND-NEXT: ubfiz w14, w14, #10, #1 |
| ; CHECK-EXPAND-NEXT: bfi w8, w12, #2, #1 |
| ; CHECK-EXPAND-NEXT: orr w10, w10, w11 |
| ; CHECK-EXPAND-NEXT: umov w11, v0.b[12] |
| ; CHECK-EXPAND-NEXT: umov w12, v0.b[5] |
| ; CHECK-EXPAND-NEXT: orr w10, w10, w13 |
| ; CHECK-EXPAND-NEXT: umov w13, v0.b[13] |
| ; CHECK-EXPAND-NEXT: bfi w8, w15, #3, #1 |
| ; CHECK-EXPAND-NEXT: umov w15, v0.b[14] |
| ; CHECK-EXPAND-NEXT: ubfiz w9, w9, #11, #1 |
| ; CHECK-EXPAND-NEXT: orr w10, w10, w14 |
| ; CHECK-EXPAND-NEXT: mov x14, #16 // =0x10 |
| ; CHECK-EXPAND-NEXT: bfi w8, w16, #4, #1 |
| ; CHECK-EXPAND-NEXT: umov w16, v0.b[6] |
| ; CHECK-EXPAND-NEXT: orr w9, w10, w9 |
| ; CHECK-EXPAND-NEXT: ubfiz w10, w11, #12, #1 |
| ; CHECK-EXPAND-NEXT: ubfiz w11, w13, #13, #1 |
| ; CHECK-EXPAND-NEXT: ld1h { z1.h }, p0/z, [x1, x14, lsl #1] |
| ; CHECK-EXPAND-NEXT: bfi w8, w12, #5, #1 |
| ; CHECK-EXPAND-NEXT: ubfiz w12, w15, #14, #1 |
| ; CHECK-EXPAND-NEXT: ld1h { z2.h }, p0/z, [x0, x14, lsl #1] |
| ; CHECK-EXPAND-NEXT: orr w9, w9, w10 |
| ; CHECK-EXPAND-NEXT: umov w10, v0.b[15] |
| ; CHECK-EXPAND-NEXT: orr w9, w9, w11 |
| ; CHECK-EXPAND-NEXT: bfi w8, w16, #6, #1 |
| ; CHECK-EXPAND-NEXT: orr w9, w9, w12 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.h, p0/z, z2.h, z1.h |
| ; CHECK-EXPAND-NEXT: orr w8, w8, w9 |
| ; CHECK-EXPAND-NEXT: orr w8, w8, w10, lsl #15 |
| ; CHECK-EXPAND-NEXT: cntp x10, p1, p1.h |
| ; CHECK-EXPAND-NEXT: and w8, w8, #0xffff |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.h |
| ; CHECK-EXPAND-NEXT: fmov s0, w8 |
| ; CHECK-EXPAND-NEXT: whilelo p4.h, xzr, x10 |
| ; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s |
| ; CHECK-EXPAND-NEXT: whilelo p3.h, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1h { z1.h }, p4/z, [x0] |
| ; CHECK-EXPAND-NEXT: fmov w8, s0 |
| ; CHECK-EXPAND-NEXT: expand z1.h, p1, z1.h |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p3/z, [x0, x8, lsl #1] |
| ; CHECK-EXPAND-NEXT: st1h { z1.h }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: expand z0.h, p2, z0.h |
| ; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2, x14, lsl #1] |
| ; CHECK-EXPAND-NEXT: add sp, sp, #16 |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <32 x i16>, ptr %ap |
| %b = load <32 x i16>, ptr %bp |
| %mask = icmp eq <32 x i16> %a, %b |
| %load = call <32 x i16> @llvm.masked.expandload.v32i16(ptr %ap, <32 x i1> %mask, <32 x i16> poison) |
| store <32 x i16> %load, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_v16i32(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_v16i32: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI9_0 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, z2.s |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b |
| ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] |
| ; VBITS_GE_256-NEXT: ldr q0, [x8, :lo12:.LCPI9_0] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI9_1 |
| ; VBITS_GE_256-NEXT: add x8, x8, :lo12:.LCPI9_1 |
| ; VBITS_GE_256-NEXT: and v0.16b, v1.16b, v0.16b |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_256-NEXT: addv h1, v0.8h |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x8] |
| ; VBITS_GE_256-NEXT: fmov w9, s1 |
| ; VBITS_GE_256-NEXT: fmov w8, s1 |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB9_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rw { z2.s }, p1/z, [x0] |
| ; VBITS_GE_256-NEXT: mov z1.d, z0.d |
| ; VBITS_GE_256-NEXT: add x0, x0, #4 |
| ; VBITS_GE_256-NEXT: mov z0.d, z2.d |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB9_3 |
| ; VBITS_GE_256-NEXT: b .LBB9_4 |
| ; VBITS_GE_256-NEXT: .LBB9_2: |
| ; VBITS_GE_256-NEXT: mov z1.d, z0.d |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB9_4 |
| ; VBITS_GE_256-NEXT: .LBB9_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB9_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB9_20 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB9_21 |
| ; VBITS_GE_256-NEXT: .LBB9_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB9_22 |
| ; VBITS_GE_256-NEXT: .LBB9_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB9_23 |
| ; VBITS_GE_256-NEXT: .LBB9_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB9_24 |
| ; VBITS_GE_256-NEXT: .LBB9_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB9_25 |
| ; VBITS_GE_256-NEXT: .LBB9_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB9_26 |
| ; VBITS_GE_256-NEXT: .LBB9_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB9_27 |
| ; VBITS_GE_256-NEXT: .LBB9_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB9_28 |
| ; VBITS_GE_256-NEXT: .LBB9_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB9_29 |
| ; VBITS_GE_256-NEXT: .LBB9_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB9_30 |
| ; VBITS_GE_256-NEXT: .LBB9_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB9_31 |
| ; VBITS_GE_256-NEXT: .LBB9_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB9_32 |
| ; VBITS_GE_256-NEXT: .LBB9_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB9_19 |
| ; VBITS_GE_256-NEXT: .LBB9_18: // %cond.load57 |
| ; VBITS_GE_256-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w8 |
| ; VBITS_GE_256-NEXT: ldr w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB9_19: // %else58 |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB9_20: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB9_6 |
| ; VBITS_GE_256-NEXT: .LBB9_21: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB9_7 |
| ; VBITS_GE_256-NEXT: .LBB9_22: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB9_8 |
| ; VBITS_GE_256-NEXT: .LBB9_23: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB9_9 |
| ; VBITS_GE_256-NEXT: .LBB9_24: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB9_10 |
| ; VBITS_GE_256-NEXT: .LBB9_25: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB9_11 |
| ; VBITS_GE_256-NEXT: .LBB9_26: // %cond.load29 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: ptrue p2.s, vl1 |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB9_12 |
| ; VBITS_GE_256-NEXT: .LBB9_27: // %cond.load33 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB9_13 |
| ; VBITS_GE_256-NEXT: .LBB9_28: // %cond.load37 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB9_14 |
| ; VBITS_GE_256-NEXT: .LBB9_29: // %cond.load41 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB9_15 |
| ; VBITS_GE_256-NEXT: .LBB9_30: // %cond.load45 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB9_16 |
| ; VBITS_GE_256-NEXT: .LBB9_31: // %cond.load49 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB9_17 |
| ; VBITS_GE_256-NEXT: .LBB9_32: // %cond.load53 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z2.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB9_18 |
| ; VBITS_GE_256-NEXT: b .LBB9_19 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_v16i32: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[8] |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[3] |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[4] |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[10] |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[5] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[9] |
| ; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[11] |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[6] |
| ; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w15 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xffff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB9_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rw { z0.s }, p1/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #4 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB9_3 |
| ; VBITS_GE_512-NEXT: b .LBB9_4 |
| ; VBITS_GE_512-NEXT: .LBB9_2: |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI9_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI9_0 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB9_4 |
| ; VBITS_GE_512-NEXT: .LBB9_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB9_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB9_20 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB9_21 |
| ; VBITS_GE_512-NEXT: .LBB9_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB9_22 |
| ; VBITS_GE_512-NEXT: .LBB9_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB9_23 |
| ; VBITS_GE_512-NEXT: .LBB9_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB9_24 |
| ; VBITS_GE_512-NEXT: .LBB9_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB9_25 |
| ; VBITS_GE_512-NEXT: .LBB9_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB9_26 |
| ; VBITS_GE_512-NEXT: .LBB9_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB9_27 |
| ; VBITS_GE_512-NEXT: .LBB9_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB9_28 |
| ; VBITS_GE_512-NEXT: .LBB9_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB9_29 |
| ; VBITS_GE_512-NEXT: .LBB9_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB9_30 |
| ; VBITS_GE_512-NEXT: .LBB9_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB9_31 |
| ; VBITS_GE_512-NEXT: .LBB9_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB9_32 |
| ; VBITS_GE_512-NEXT: .LBB9_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB9_19 |
| ; VBITS_GE_512-NEXT: .LBB9_18: // %cond.load57 |
| ; VBITS_GE_512-NEXT: mov w8, #15 // =0xf |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w8 |
| ; VBITS_GE_512-NEXT: ldr w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB9_19: // %else58 |
| ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB9_20: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB9_6 |
| ; VBITS_GE_512-NEXT: .LBB9_21: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB9_7 |
| ; VBITS_GE_512-NEXT: .LBB9_22: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB9_8 |
| ; VBITS_GE_512-NEXT: .LBB9_23: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB9_9 |
| ; VBITS_GE_512-NEXT: .LBB9_24: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB9_10 |
| ; VBITS_GE_512-NEXT: .LBB9_25: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB9_11 |
| ; VBITS_GE_512-NEXT: .LBB9_26: // %cond.load29 |
| ; VBITS_GE_512-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB9_12 |
| ; VBITS_GE_512-NEXT: .LBB9_27: // %cond.load33 |
| ; VBITS_GE_512-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB9_13 |
| ; VBITS_GE_512-NEXT: .LBB9_28: // %cond.load37 |
| ; VBITS_GE_512-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB9_14 |
| ; VBITS_GE_512-NEXT: .LBB9_29: // %cond.load41 |
| ; VBITS_GE_512-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB9_15 |
| ; VBITS_GE_512-NEXT: .LBB9_30: // %cond.load45 |
| ; VBITS_GE_512-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB9_16 |
| ; VBITS_GE_512-NEXT: .LBB9_31: // %cond.load49 |
| ; VBITS_GE_512-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB9_17 |
| ; VBITS_GE_512-NEXT: .LBB9_32: // %cond.load53 |
| ; VBITS_GE_512-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB9_18 |
| ; VBITS_GE_512-NEXT: b .LBB9_19 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_v16i32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: sub sp, sp, #16 |
| ; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: ptrue p3.s |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s |
| ; CHECK-EXPAND-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-EXPAND-NEXT: umov w8, v0.b[0] |
| ; CHECK-EXPAND-NEXT: umov w9, v0.b[1] |
| ; CHECK-EXPAND-NEXT: umov w10, v0.b[2] |
| ; CHECK-EXPAND-NEXT: umov w11, v0.b[3] |
| ; CHECK-EXPAND-NEXT: and w8, w8, #0x1 |
| ; CHECK-EXPAND-NEXT: bfi w8, w9, #1, #1 |
| ; CHECK-EXPAND-NEXT: umov w9, v0.b[4] |
| ; CHECK-EXPAND-NEXT: bfi w8, w10, #2, #1 |
| ; CHECK-EXPAND-NEXT: umov w10, v0.b[5] |
| ; CHECK-EXPAND-NEXT: bfi w8, w11, #3, #1 |
| ; CHECK-EXPAND-NEXT: mov x11, #8 // =0x8 |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1, x11, lsl #2] |
| ; CHECK-EXPAND-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2] |
| ; CHECK-EXPAND-NEXT: bfi w8, w9, #4, #1 |
| ; CHECK-EXPAND-NEXT: umov w9, v0.b[6] |
| ; CHECK-EXPAND-NEXT: bfi w8, w10, #5, #1 |
| ; CHECK-EXPAND-NEXT: umov w10, v0.b[7] |
| ; CHECK-EXPAND-NEXT: cmpeq p2.s, p0/z, z2.s, z1.s |
| ; CHECK-EXPAND-NEXT: bfi w8, w9, #6, #1 |
| ; CHECK-EXPAND-NEXT: orr w8, w8, w10, lsl #7 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.s |
| ; CHECK-EXPAND-NEXT: cntp x10, p1, p1.s |
| ; CHECK-EXPAND-NEXT: and w8, w8, #0xff |
| ; CHECK-EXPAND-NEXT: fmov s0, w8 |
| ; CHECK-EXPAND-NEXT: whilelo p4.s, xzr, x10 |
| ; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s |
| ; CHECK-EXPAND-NEXT: whilelo p3.s, xzr, x9 |
| ; CHECK-EXPAND-NEXT: fmov w8, s0 |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p4/z, [x0] |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p3/z, [x0, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: expand z1.s, p1, z1.s |
| ; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: expand z0.s, p2, z0.s |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2, x11, lsl #2] |
| ; CHECK-EXPAND-NEXT: add sp, sp, #16 |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <16 x i32>, ptr %ap |
| %b = load <16 x i32>, ptr %bp |
| %mask = icmp eq <16 x i32> %a, %b |
| %load = call <16 x i32> @llvm.masked.expandload.v16i32(ptr %ap, <16 x i1> %mask, <16 x i32> poison) |
| store <16 x i32> %load, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_v8i64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_v8i64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s |
| ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s |
| ; VBITS_GE_256-NEXT: ptrue p1.d |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI10_0 |
| ; VBITS_GE_256-NEXT: add x8, x8, :lo12:.LCPI10_0 |
| ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x8] |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB10_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rd { z2.d }, p1/z, [x0] |
| ; VBITS_GE_256-NEXT: mov z1.d, z0.d |
| ; VBITS_GE_256-NEXT: add x0, x0, #8 |
| ; VBITS_GE_256-NEXT: mov z0.d, z2.d |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB10_3 |
| ; VBITS_GE_256-NEXT: b .LBB10_4 |
| ; VBITS_GE_256-NEXT: .LBB10_2: |
| ; VBITS_GE_256-NEXT: mov z1.d, z0.d |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB10_4 |
| ; VBITS_GE_256-NEXT: .LBB10_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_256-NEXT: .LBB10_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB10_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB10_13 |
| ; VBITS_GE_256-NEXT: .LBB10_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB10_14 |
| ; VBITS_GE_256-NEXT: .LBB10_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB10_15 |
| ; VBITS_GE_256-NEXT: .LBB10_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB10_16 |
| ; VBITS_GE_256-NEXT: .LBB10_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB10_11 |
| ; VBITS_GE_256-NEXT: .LBB10_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w8, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.d, x8 |
| ; VBITS_GE_256-NEXT: ldr x8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/m, x8 |
| ; VBITS_GE_256-NEXT: .LBB10_11: // %else26 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB10_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB10_6 |
| ; VBITS_GE_256-NEXT: .LBB10_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB10_7 |
| ; VBITS_GE_256-NEXT: .LBB10_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: ptrue p2.d, vl1 |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/m, x9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB10_8 |
| ; VBITS_GE_256-NEXT: .LBB10_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/m, x9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB10_9 |
| ; VBITS_GE_256-NEXT: .LBB10_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/m, x9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB10_10 |
| ; VBITS_GE_256-NEXT: b .LBB10_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_v8i64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB10_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rd { z0.d }, p1/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #8 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB10_3 |
| ; VBITS_GE_512-NEXT: b .LBB10_4 |
| ; VBITS_GE_512-NEXT: .LBB10_2: |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI10_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI10_0 |
| ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB10_4 |
| ; VBITS_GE_512-NEXT: .LBB10_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_512-NEXT: .LBB10_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB10_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB10_13 |
| ; VBITS_GE_512-NEXT: .LBB10_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB10_14 |
| ; VBITS_GE_512-NEXT: .LBB10_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB10_15 |
| ; VBITS_GE_512-NEXT: .LBB10_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB10_16 |
| ; VBITS_GE_512-NEXT: .LBB10_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB10_11 |
| ; VBITS_GE_512-NEXT: .LBB10_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.d, x8 |
| ; VBITS_GE_512-NEXT: ldr x8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x8 |
| ; VBITS_GE_512-NEXT: .LBB10_11: // %else26 |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB10_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB10_6 |
| ; VBITS_GE_512-NEXT: .LBB10_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB10_7 |
| ; VBITS_GE_512-NEXT: .LBB10_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB10_8 |
| ; VBITS_GE_512-NEXT: .LBB10_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB10_9 |
| ; VBITS_GE_512-NEXT: .LBB10_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB10_10 |
| ; VBITS_GE_512-NEXT: b .LBB10_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_v8i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: sub sp, sp, #16 |
| ; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: mov x10, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ptrue p3.s |
| ; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: ld1d { z2.d }, p0/z, [x0, x10, lsl #3] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d |
| ; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1, x10, lsl #3] |
| ; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z2.d, z1.d |
| ; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: umov w8, v0.h[0] |
| ; CHECK-EXPAND-NEXT: umov w9, v0.h[1] |
| ; CHECK-EXPAND-NEXT: umov w11, v0.h[2] |
| ; CHECK-EXPAND-NEXT: and w8, w8, #0x1 |
| ; CHECK-EXPAND-NEXT: bfi w8, w9, #1, #1 |
| ; CHECK-EXPAND-NEXT: umov w9, v0.h[3] |
| ; CHECK-EXPAND-NEXT: bfi w8, w11, #2, #1 |
| ; CHECK-EXPAND-NEXT: cntp x11, p1, p1.d |
| ; CHECK-EXPAND-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.d |
| ; CHECK-EXPAND-NEXT: and w8, w8, #0xf |
| ; CHECK-EXPAND-NEXT: whilelo p4.d, xzr, x11 |
| ; CHECK-EXPAND-NEXT: fmov s0, w8 |
| ; CHECK-EXPAND-NEXT: ld1d { z1.d }, p4/z, [x0] |
| ; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s |
| ; CHECK-EXPAND-NEXT: whilelo p3.d, xzr, x9 |
| ; CHECK-EXPAND-NEXT: fmov w8, s0 |
| ; CHECK-EXPAND-NEXT: expand z1.d, p1, z1.d |
| ; CHECK-EXPAND-NEXT: ld1d { z0.d }, p3/z, [x0, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: expand z0.d, p2, z0.d |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2, x10, lsl #3] |
| ; CHECK-EXPAND-NEXT: add sp, sp, #16 |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <8 x i64>, ptr %ap |
| %b = load <8 x i64>, ptr %bp |
| %mask = icmp eq <8 x i64> %a, %b |
| %load = call <8 x i64> @llvm.masked.expandload.v8i64(ptr %ap, <8 x i1> %mask, <8 x i64> poison) |
| store <8 x i64> %load, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_passthru_v8i64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_passthru_v8i64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z2.d, z1.d |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z3.d, z0.d |
| ; VBITS_GE_256-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 |
| ; VBITS_GE_256-NEXT: mov z3.d, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: uzp1 z2.s, z2.s, z2.s |
| ; VBITS_GE_256-NEXT: uzp1 z3.s, z3.s, z3.s |
| ; VBITS_GE_256-NEXT: splice z3.s, p1, z3.s, z2.s |
| ; VBITS_GE_256-NEXT: ptrue p1.d, vl1 |
| ; VBITS_GE_256-NEXT: uzp1 z2.h, z3.h, z3.h |
| ; VBITS_GE_256-NEXT: uzp1 z2.b, z2.b, z2.b |
| ; VBITS_GE_256-NEXT: umov w8, v2.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v2.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v2.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v2.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v2.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v2.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v2.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v2.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbnz w9, #0, .LBB11_10 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %else |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB11_11 |
| ; VBITS_GE_256-NEXT: .LBB11_2: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB11_12 |
| ; VBITS_GE_256-NEXT: .LBB11_3: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB11_13 |
| ; VBITS_GE_256-NEXT: .LBB11_4: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB11_14 |
| ; VBITS_GE_256-NEXT: .LBB11_5: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB11_15 |
| ; VBITS_GE_256-NEXT: .LBB11_6: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB11_16 |
| ; VBITS_GE_256-NEXT: .LBB11_7: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB11_9 |
| ; VBITS_GE_256-NEXT: .LBB11_8: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w8, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.d |
| ; VBITS_GE_256-NEXT: mov z3.d, x8 |
| ; VBITS_GE_256-NEXT: ldr x8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/m, x8 |
| ; VBITS_GE_256-NEXT: .LBB11_9: // %else26 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB11_10: // %cond.load |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: mov z0.d, p1/m, x9 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB11_2 |
| ; VBITS_GE_256-NEXT: .LBB11_11: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p2.d |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: cmpeq p3.d, p2/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z0.d, p3/m, x9 |
| ; VBITS_GE_256-NEXT: tbz w8, #2, .LBB11_3 |
| ; VBITS_GE_256-NEXT: .LBB11_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p2.d |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: cmpeq p3.d, p2/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z0.d, p3/m, x9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB11_4 |
| ; VBITS_GE_256-NEXT: .LBB11_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p2.d |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: cmpeq p3.d, p2/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z0.d, p3/m, x9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB11_5 |
| ; VBITS_GE_256-NEXT: .LBB11_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: mov z1.d, p1/m, x9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB11_6 |
| ; VBITS_GE_256-NEXT: .LBB11_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.d |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/m, x9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB11_7 |
| ; VBITS_GE_256-NEXT: .LBB11_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.d |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/m, x9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB11_8 |
| ; VBITS_GE_256-NEXT: b .LBB11_9 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_passthru_v8i64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z1.d, z0.d |
| ; VBITS_GE_512-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: uzp1 z1.s, z1.s, z1.s |
| ; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h |
| ; VBITS_GE_512-NEXT: uzp1 z1.b, z1.b, z1.b |
| ; VBITS_GE_512-NEXT: umov w8, v1.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v1.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v1.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v1.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v1.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v1.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v1.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v1.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbnz w9, #0, .LBB11_10 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %else |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB11_11 |
| ; VBITS_GE_512-NEXT: .LBB11_2: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB11_12 |
| ; VBITS_GE_512-NEXT: .LBB11_3: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB11_13 |
| ; VBITS_GE_512-NEXT: .LBB11_4: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB11_14 |
| ; VBITS_GE_512-NEXT: .LBB11_5: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB11_15 |
| ; VBITS_GE_512-NEXT: .LBB11_6: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB11_16 |
| ; VBITS_GE_512-NEXT: .LBB11_7: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB11_9 |
| ; VBITS_GE_512-NEXT: .LBB11_8: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x8 |
| ; VBITS_GE_512-NEXT: ldr x8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x8 |
| ; VBITS_GE_512-NEXT: .LBB11_9: // %else26 |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB11_10: // %cond.load |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: ptrue p1.d, vl1 |
| ; VBITS_GE_512-NEXT: mov z0.d, p1/m, x9 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB11_2 |
| ; VBITS_GE_512-NEXT: .LBB11_11: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_512-NEXT: tbz w8, #2, .LBB11_3 |
| ; VBITS_GE_512-NEXT: .LBB11_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB11_4 |
| ; VBITS_GE_512-NEXT: .LBB11_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB11_5 |
| ; VBITS_GE_512-NEXT: .LBB11_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB11_6 |
| ; VBITS_GE_512-NEXT: .LBB11_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB11_7 |
| ; VBITS_GE_512-NEXT: .LBB11_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: ldr x9, [x0], #8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB11_8 |
| ; VBITS_GE_512-NEXT: b .LBB11_9 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_passthru_v8i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: sub sp, sp, #16 |
| ; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: mov x10, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ptrue p3.s |
| ; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: ld1d { z2.d }, p0/z, [x1, x10, lsl #3] |
| ; CHECK-EXPAND-NEXT: ld1d { z3.d }, p0/z, [x0, x10, lsl #3] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d |
| ; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z3.d, z2.d |
| ; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: umov w8, v0.h[0] |
| ; CHECK-EXPAND-NEXT: umov w9, v0.h[1] |
| ; CHECK-EXPAND-NEXT: umov w11, v0.h[2] |
| ; CHECK-EXPAND-NEXT: and w8, w8, #0x1 |
| ; CHECK-EXPAND-NEXT: bfi w8, w9, #1, #1 |
| ; CHECK-EXPAND-NEXT: umov w9, v0.h[3] |
| ; CHECK-EXPAND-NEXT: bfi w8, w11, #2, #1 |
| ; CHECK-EXPAND-NEXT: cntp x11, p1, p1.d |
| ; CHECK-EXPAND-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.d |
| ; CHECK-EXPAND-NEXT: and w8, w8, #0xf |
| ; CHECK-EXPAND-NEXT: whilelo p4.d, xzr, x11 |
| ; CHECK-EXPAND-NEXT: fmov s0, w8 |
| ; CHECK-EXPAND-NEXT: ld1d { z3.d }, p4/z, [x0] |
| ; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s |
| ; CHECK-EXPAND-NEXT: whilelo p3.d, xzr, x9 |
| ; CHECK-EXPAND-NEXT: fmov w8, s0 |
| ; CHECK-EXPAND-NEXT: expand z3.d, p1, z3.d |
| ; CHECK-EXPAND-NEXT: mov z1.d, p1/m, z3.d |
| ; CHECK-EXPAND-NEXT: ld1d { z0.d }, p3/z, [x0, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: expand z0.d, p2, z0.d |
| ; CHECK-EXPAND-NEXT: sel z0.d, p2, z0.d, z2.d |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2, x10, lsl #3] |
| ; CHECK-EXPAND-NEXT: add sp, sp, #16 |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <8 x i64>, ptr %ap |
| %b = load <8 x i64>, ptr %bp |
| %mask = icmp eq <8 x i64> %a, %b |
| %load = call <8 x i64> @llvm.masked.expandload.v8i64(ptr %ap, <8 x i1> %mask, <8 x i64> %b) |
| store <8 x i64> %load, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_passthru_v8f64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_passthru_v8f64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z2.d, z1.d |
| ; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z3.d, z0.d |
| ; VBITS_GE_256-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 |
| ; VBITS_GE_256-NEXT: mov z3.d, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: uzp1 z2.s, z2.s, z2.s |
| ; VBITS_GE_256-NEXT: uzp1 z3.s, z3.s, z3.s |
| ; VBITS_GE_256-NEXT: splice z3.s, p1, z3.s, z2.s |
| ; VBITS_GE_256-NEXT: uzp1 z2.h, z3.h, z3.h |
| ; VBITS_GE_256-NEXT: uzp1 z2.b, z2.b, z2.b |
| ; VBITS_GE_256-NEXT: umov w8, v2.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v2.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v2.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v2.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v2.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v2.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v2.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v2.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbnz w9, #0, .LBB12_10 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %else |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB12_11 |
| ; VBITS_GE_256-NEXT: .LBB12_2: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB12_12 |
| ; VBITS_GE_256-NEXT: .LBB12_3: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB12_13 |
| ; VBITS_GE_256-NEXT: .LBB12_4: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB12_14 |
| ; VBITS_GE_256-NEXT: .LBB12_5: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB12_15 |
| ; VBITS_GE_256-NEXT: .LBB12_6: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB12_16 |
| ; VBITS_GE_256-NEXT: .LBB12_7: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB12_9 |
| ; VBITS_GE_256-NEXT: .LBB12_8: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w8, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.d |
| ; VBITS_GE_256-NEXT: mov z3.d, x8 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: ldr d2, [x0] |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/m, d2 |
| ; VBITS_GE_256-NEXT: .LBB12_9: // %else26 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB12_10: // %cond.load |
| ; VBITS_GE_256-NEXT: ldr d2, [x0], #8 |
| ; VBITS_GE_256-NEXT: ptrue p1.d, vl1 |
| ; VBITS_GE_256-NEXT: mov z0.d, p1/m, z2.d |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB12_2 |
| ; VBITS_GE_256-NEXT: .LBB12_11: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.d |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: ldr d2, [x0], #8 |
| ; VBITS_GE_256-NEXT: mov z0.d, p2/m, d2 |
| ; VBITS_GE_256-NEXT: tbz w8, #2, .LBB12_3 |
| ; VBITS_GE_256-NEXT: .LBB12_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.d |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: ldr d2, [x0], #8 |
| ; VBITS_GE_256-NEXT: mov z0.d, p2/m, d2 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB12_4 |
| ; VBITS_GE_256-NEXT: .LBB12_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.d |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: ldr d2, [x0], #8 |
| ; VBITS_GE_256-NEXT: mov z0.d, p2/m, d2 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB12_5 |
| ; VBITS_GE_256-NEXT: .LBB12_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ldr d2, [x0], #8 |
| ; VBITS_GE_256-NEXT: ptrue p1.d, vl1 |
| ; VBITS_GE_256-NEXT: mov z1.d, p1/m, z2.d |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB12_6 |
| ; VBITS_GE_256-NEXT: .LBB12_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.d |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: ldr d2, [x0], #8 |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/m, d2 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB12_7 |
| ; VBITS_GE_256-NEXT: .LBB12_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z2.d, #0, #1 |
| ; VBITS_GE_256-NEXT: ptrue p1.d |
| ; VBITS_GE_256-NEXT: mov z3.d, x9 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d |
| ; VBITS_GE_256-NEXT: ldr d2, [x0], #8 |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/m, d2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB12_8 |
| ; VBITS_GE_256-NEXT: b .LBB12_9 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_passthru_v8f64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: fcmeq p1.d, p0/z, z1.d, z0.d |
| ; VBITS_GE_512-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: uzp1 z1.s, z1.s, z1.s |
| ; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h |
| ; VBITS_GE_512-NEXT: uzp1 z1.b, z1.b, z1.b |
| ; VBITS_GE_512-NEXT: umov w8, v1.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v1.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v1.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v1.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v1.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v1.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v1.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v1.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbnz w9, #0, .LBB12_10 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %else |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB12_11 |
| ; VBITS_GE_512-NEXT: .LBB12_2: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB12_12 |
| ; VBITS_GE_512-NEXT: .LBB12_3: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB12_13 |
| ; VBITS_GE_512-NEXT: .LBB12_4: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB12_14 |
| ; VBITS_GE_512-NEXT: .LBB12_5: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB12_15 |
| ; VBITS_GE_512-NEXT: .LBB12_6: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB12_16 |
| ; VBITS_GE_512-NEXT: .LBB12_7: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB12_9 |
| ; VBITS_GE_512-NEXT: .LBB12_8: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x8 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: ldr d1, [x0] |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1 |
| ; VBITS_GE_512-NEXT: .LBB12_9: // %else26 |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB12_10: // %cond.load |
| ; VBITS_GE_512-NEXT: ldr d1, [x0], #8 |
| ; VBITS_GE_512-NEXT: ptrue p1.d, vl1 |
| ; VBITS_GE_512-NEXT: mov z0.d, p1/m, z1.d |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB12_2 |
| ; VBITS_GE_512-NEXT: .LBB12_11: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: ldr d1, [x0], #8 |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1 |
| ; VBITS_GE_512-NEXT: tbz w8, #2, .LBB12_3 |
| ; VBITS_GE_512-NEXT: .LBB12_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: ldr d1, [x0], #8 |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB12_4 |
| ; VBITS_GE_512-NEXT: .LBB12_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: ldr d1, [x0], #8 |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB12_5 |
| ; VBITS_GE_512-NEXT: .LBB12_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: ldr d1, [x0], #8 |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB12_6 |
| ; VBITS_GE_512-NEXT: .LBB12_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: ldr d1, [x0], #8 |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB12_7 |
| ; VBITS_GE_512-NEXT: .LBB12_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.d, #0, #1 |
| ; VBITS_GE_512-NEXT: ptrue p1.d |
| ; VBITS_GE_512-NEXT: mov z2.d, x9 |
| ; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d |
| ; VBITS_GE_512-NEXT: ldr d1, [x0], #8 |
| ; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB12_8 |
| ; VBITS_GE_512-NEXT: b .LBB12_9 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_passthru_v8f64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: sub sp, sp, #16 |
| ; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: mov x10, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ptrue p3.s |
| ; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: ld1d { z2.d }, p0/z, [x1, x10, lsl #3] |
| ; CHECK-EXPAND-NEXT: ld1d { z3.d }, p0/z, [x0, x10, lsl #3] |
| ; CHECK-EXPAND-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d |
| ; CHECK-EXPAND-NEXT: fcmeq p2.d, p0/z, z3.d, z2.d |
| ; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: umov w8, v0.h[0] |
| ; CHECK-EXPAND-NEXT: umov w9, v0.h[1] |
| ; CHECK-EXPAND-NEXT: umov w11, v0.h[2] |
| ; CHECK-EXPAND-NEXT: and w8, w8, #0x1 |
| ; CHECK-EXPAND-NEXT: bfi w8, w9, #1, #1 |
| ; CHECK-EXPAND-NEXT: umov w9, v0.h[3] |
| ; CHECK-EXPAND-NEXT: bfi w8, w11, #2, #1 |
| ; CHECK-EXPAND-NEXT: cntp x11, p1, p1.d |
| ; CHECK-EXPAND-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.d |
| ; CHECK-EXPAND-NEXT: and w8, w8, #0xf |
| ; CHECK-EXPAND-NEXT: whilelo p4.d, xzr, x11 |
| ; CHECK-EXPAND-NEXT: fmov s0, w8 |
| ; CHECK-EXPAND-NEXT: ld1d { z3.d }, p4/z, [x0] |
| ; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s |
| ; CHECK-EXPAND-NEXT: whilelo p3.d, xzr, x9 |
| ; CHECK-EXPAND-NEXT: fmov w8, s0 |
| ; CHECK-EXPAND-NEXT: expand z3.d, p1, z3.d |
| ; CHECK-EXPAND-NEXT: mov z1.d, p1/m, z3.d |
| ; CHECK-EXPAND-NEXT: ld1d { z0.d }, p3/z, [x0, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: expand z0.d, p2, z0.d |
| ; CHECK-EXPAND-NEXT: sel z0.d, p2, z0.d, z2.d |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2, x10, lsl #3] |
| ; CHECK-EXPAND-NEXT: add sp, sp, #16 |
| ; CHECK-EXPAND-NEXT: ret |
| %a = load <8 x double>, ptr %ap |
| %b = load <8 x double>, ptr %bp |
| %mask = fcmp oeq <8 x double> %a, %b |
| %load = call <8 x double> @llvm.masked.expandload.v8f64(ptr %ap, <8 x i1> %mask, <8 x double> %b) |
| store <8 x double> %load, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v32i8i16(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_v32i8i16: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #112 |
| ; VBITS_GE_256-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 112 |
| ; VBITS_GE_256-NEXT: .cfi_offset w19, -8 |
| ; VBITS_GE_256-NEXT: .cfi_offset w20, -16 |
| ; VBITS_GE_256-NEXT: .cfi_offset w21, -24 |
| ; VBITS_GE_256-NEXT: .cfi_offset w22, -32 |
| ; VBITS_GE_256-NEXT: .cfi_offset w23, -40 |
| ; VBITS_GE_256-NEXT: .cfi_offset w24, -48 |
| ; VBITS_GE_256-NEXT: .cfi_offset w25, -56 |
| ; VBITS_GE_256-NEXT: .cfi_offset w26, -64 |
| ; VBITS_GE_256-NEXT: .cfi_offset w27, -72 |
| ; VBITS_GE_256-NEXT: .cfi_offset w28, -80 |
| ; VBITS_GE_256-NEXT: .cfi_offset w30, -88 |
| ; VBITS_GE_256-NEXT: .cfi_offset w29, -96 |
| ; VBITS_GE_256-NEXT: ptrue p1.b, vl32 |
| ; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p0.b, p1/z, z0.b, #0 |
| ; VBITS_GE_256-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p0.b |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[1] |
| ; VBITS_GE_256-NEXT: fmov w6, s0 |
| ; VBITS_GE_256-NEXT: umov w4, v0.b[7] |
| ; VBITS_GE_256-NEXT: umov w5, v0.b[8] |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[2] |
| ; VBITS_GE_256-NEXT: umov w3, v0.b[9] |
| ; VBITS_GE_256-NEXT: mov z5.b, z0.b[18] |
| ; VBITS_GE_256-NEXT: mov z6.b, z0.b[19] |
| ; VBITS_GE_256-NEXT: umov w11, v0.b[3] |
| ; VBITS_GE_256-NEXT: and w6, w6, #0x1 |
| ; VBITS_GE_256-NEXT: umov w1, v0.b[10] |
| ; VBITS_GE_256-NEXT: mov z7.b, z0.b[20] |
| ; VBITS_GE_256-NEXT: bfi w6, w13, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w18, v0.b[11] |
| ; VBITS_GE_256-NEXT: mov z16.b, z0.b[21] |
| ; VBITS_GE_256-NEXT: ubfiz w13, w4, #7, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w4, w5, #8, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: mov z17.b, z0.b[22] |
| ; VBITS_GE_256-NEXT: fmov w20, s5 |
| ; VBITS_GE_256-NEXT: fmov w21, s6 |
| ; VBITS_GE_256-NEXT: bfi w6, w12, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w16, v0.b[12] |
| ; VBITS_GE_256-NEXT: mov z18.b, z0.b[23] |
| ; VBITS_GE_256-NEXT: fmov w22, s7 |
| ; VBITS_GE_256-NEXT: orr w12, w13, w4 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w3, #9, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: umov w17, v0.b[13] |
| ; VBITS_GE_256-NEXT: mov z19.b, z0.b[24] |
| ; VBITS_GE_256-NEXT: fmov w23, s16 |
| ; VBITS_GE_256-NEXT: bfi w6, w11, #3, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w1, #10, #1 |
| ; VBITS_GE_256-NEXT: mov z20.b, z0.b[25] |
| ; VBITS_GE_256-NEXT: fmov w24, s17 |
| ; VBITS_GE_256-NEXT: ubfiz w3, w20, #18, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w4, w21, #19, #1 |
| ; VBITS_GE_256-NEXT: orr w12, w12, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w18, #11, #1 |
| ; VBITS_GE_256-NEXT: mov z21.b, z0.b[26] |
| ; VBITS_GE_256-NEXT: fmov w25, s18 |
| ; VBITS_GE_256-NEXT: ubfiz w1, w22, #20, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w12, w11 |
| ; VBITS_GE_256-NEXT: bfi w6, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_256-NEXT: fmov w26, s19 |
| ; VBITS_GE_256-NEXT: orr w3, w3, w4 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w16, #12, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w23, #21, #1 |
| ; VBITS_GE_256-NEXT: mov z22.b, z0.b[27] |
| ; VBITS_GE_256-NEXT: fmov w27, s20 |
| ; VBITS_GE_256-NEXT: orr w10, w3, w1 |
| ; VBITS_GE_256-NEXT: bfi w6, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w9, w17, #13, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w24, #22, #1 |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[6] |
| ; VBITS_GE_256-NEXT: umov w15, v0.b[15] |
| ; VBITS_GE_256-NEXT: mov z3.b, z0.b[16] |
| ; VBITS_GE_256-NEXT: mov z23.b, z0.b[28] |
| ; VBITS_GE_256-NEXT: fmov w5, s21 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: orr w10, w10, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w25, #23, #1 |
| ; VBITS_GE_256-NEXT: mov z4.b, z0.b[17] |
| ; VBITS_GE_256-NEXT: mov z24.b, z0.b[29] |
| ; VBITS_GE_256-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_256-NEXT: orr w10, w10, w16 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w26, #24, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, z0.b[30] |
| ; VBITS_GE_256-NEXT: fmov w28, s22 |
| ; VBITS_GE_256-NEXT: orr w10, w10, w12 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w27, #25, #1 |
| ; VBITS_GE_256-NEXT: fmov w7, s3 |
| ; VBITS_GE_256-NEXT: fmov w29, s23 |
| ; VBITS_GE_256-NEXT: orr w10, w10, w11 |
| ; VBITS_GE_256-NEXT: ubfiz w14, w5, #26, #1 |
| ; VBITS_GE_256-NEXT: fmov w19, s4 |
| ; VBITS_GE_256-NEXT: fmov w30, s24 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w15, #15, #1 |
| ; VBITS_GE_256-NEXT: bfi w6, w8, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w8, w9, w12 |
| ; VBITS_GE_256-NEXT: orr w9, w10, w13 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w14 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w28, #27, #1 |
| ; VBITS_GE_256-NEXT: fmov w14, s2 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w7, #16, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w29, #28, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w19, #17, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w30, #29, #1 |
| ; VBITS_GE_256-NEXT: mov z1.b, z0.b[31] |
| ; VBITS_GE_256-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w14, #30, #1 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w12 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_256-NEXT: orr w8, w6, w8 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w11 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_256-NEXT: fmov w9, s1 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w9, lsl #31 |
| ; VBITS_GE_256-NEXT: tbz w8, #0, .LBB13_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rb { z0.b }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #1 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB13_3 |
| ; VBITS_GE_256-NEXT: b .LBB13_4 |
| ; VBITS_GE_256-NEXT: .LBB13_2: |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI13_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI13_0 |
| ; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB13_4 |
| ; VBITS_GE_256-NEXT: .LBB13_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB13_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB13_36 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB13_37 |
| ; VBITS_GE_256-NEXT: .LBB13_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB13_38 |
| ; VBITS_GE_256-NEXT: .LBB13_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB13_39 |
| ; VBITS_GE_256-NEXT: .LBB13_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB13_40 |
| ; VBITS_GE_256-NEXT: .LBB13_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB13_41 |
| ; VBITS_GE_256-NEXT: .LBB13_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB13_42 |
| ; VBITS_GE_256-NEXT: .LBB13_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB13_43 |
| ; VBITS_GE_256-NEXT: .LBB13_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB13_44 |
| ; VBITS_GE_256-NEXT: .LBB13_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB13_45 |
| ; VBITS_GE_256-NEXT: .LBB13_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB13_46 |
| ; VBITS_GE_256-NEXT: .LBB13_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB13_47 |
| ; VBITS_GE_256-NEXT: .LBB13_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB13_48 |
| ; VBITS_GE_256-NEXT: .LBB13_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB13_49 |
| ; VBITS_GE_256-NEXT: .LBB13_18: // %else58 |
| ; VBITS_GE_256-NEXT: tbnz w8, #16, .LBB13_50 |
| ; VBITS_GE_256-NEXT: .LBB13_19: // %else62 |
| ; VBITS_GE_256-NEXT: tbnz w8, #17, .LBB13_51 |
| ; VBITS_GE_256-NEXT: .LBB13_20: // %else66 |
| ; VBITS_GE_256-NEXT: tbnz w8, #18, .LBB13_52 |
| ; VBITS_GE_256-NEXT: .LBB13_21: // %else70 |
| ; VBITS_GE_256-NEXT: tbnz w8, #19, .LBB13_53 |
| ; VBITS_GE_256-NEXT: .LBB13_22: // %else74 |
| ; VBITS_GE_256-NEXT: tbnz w8, #20, .LBB13_54 |
| ; VBITS_GE_256-NEXT: .LBB13_23: // %else78 |
| ; VBITS_GE_256-NEXT: tbnz w8, #21, .LBB13_55 |
| ; VBITS_GE_256-NEXT: .LBB13_24: // %else82 |
| ; VBITS_GE_256-NEXT: tbnz w8, #22, .LBB13_56 |
| ; VBITS_GE_256-NEXT: .LBB13_25: // %else86 |
| ; VBITS_GE_256-NEXT: tbnz w8, #23, .LBB13_57 |
| ; VBITS_GE_256-NEXT: .LBB13_26: // %else90 |
| ; VBITS_GE_256-NEXT: tbnz w8, #24, .LBB13_58 |
| ; VBITS_GE_256-NEXT: .LBB13_27: // %else94 |
| ; VBITS_GE_256-NEXT: tbnz w8, #25, .LBB13_59 |
| ; VBITS_GE_256-NEXT: .LBB13_28: // %else98 |
| ; VBITS_GE_256-NEXT: tbnz w8, #26, .LBB13_60 |
| ; VBITS_GE_256-NEXT: .LBB13_29: // %else102 |
| ; VBITS_GE_256-NEXT: tbnz w8, #27, .LBB13_61 |
| ; VBITS_GE_256-NEXT: .LBB13_30: // %else106 |
| ; VBITS_GE_256-NEXT: tbnz w8, #28, .LBB13_62 |
| ; VBITS_GE_256-NEXT: .LBB13_31: // %else110 |
| ; VBITS_GE_256-NEXT: tbnz w8, #29, .LBB13_63 |
| ; VBITS_GE_256-NEXT: .LBB13_32: // %else114 |
| ; VBITS_GE_256-NEXT: tbnz w8, #30, .LBB13_64 |
| ; VBITS_GE_256-NEXT: .LBB13_33: // %else118 |
| ; VBITS_GE_256-NEXT: tbz w8, #31, .LBB13_35 |
| ; VBITS_GE_256-NEXT: .LBB13_34: // %cond.load121 |
| ; VBITS_GE_256-NEXT: mov w8, #31 // =0x1f |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w8 |
| ; VBITS_GE_256-NEXT: ldrb w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB13_35: // %else122 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b |
| ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 |
| ; VBITS_GE_256-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b |
| ; VBITS_GE_256-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2] |
| ; VBITS_GE_256-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1] |
| ; VBITS_GE_256-NEXT: add sp, sp, #112 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB13_36: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB13_6 |
| ; VBITS_GE_256-NEXT: .LBB13_37: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB13_7 |
| ; VBITS_GE_256-NEXT: .LBB13_38: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB13_8 |
| ; VBITS_GE_256-NEXT: .LBB13_39: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB13_9 |
| ; VBITS_GE_256-NEXT: .LBB13_40: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB13_10 |
| ; VBITS_GE_256-NEXT: .LBB13_41: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB13_11 |
| ; VBITS_GE_256-NEXT: .LBB13_42: // %cond.load29 |
| ; VBITS_GE_256-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB13_12 |
| ; VBITS_GE_256-NEXT: .LBB13_43: // %cond.load33 |
| ; VBITS_GE_256-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB13_13 |
| ; VBITS_GE_256-NEXT: .LBB13_44: // %cond.load37 |
| ; VBITS_GE_256-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB13_14 |
| ; VBITS_GE_256-NEXT: .LBB13_45: // %cond.load41 |
| ; VBITS_GE_256-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB13_15 |
| ; VBITS_GE_256-NEXT: .LBB13_46: // %cond.load45 |
| ; VBITS_GE_256-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB13_16 |
| ; VBITS_GE_256-NEXT: .LBB13_47: // %cond.load49 |
| ; VBITS_GE_256-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB13_17 |
| ; VBITS_GE_256-NEXT: .LBB13_48: // %cond.load53 |
| ; VBITS_GE_256-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB13_18 |
| ; VBITS_GE_256-NEXT: .LBB13_49: // %cond.load57 |
| ; VBITS_GE_256-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #16, .LBB13_19 |
| ; VBITS_GE_256-NEXT: .LBB13_50: // %cond.load61 |
| ; VBITS_GE_256-NEXT: mov w9, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #17, .LBB13_20 |
| ; VBITS_GE_256-NEXT: .LBB13_51: // %cond.load65 |
| ; VBITS_GE_256-NEXT: mov w9, #17 // =0x11 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #18, .LBB13_21 |
| ; VBITS_GE_256-NEXT: .LBB13_52: // %cond.load69 |
| ; VBITS_GE_256-NEXT: mov w9, #18 // =0x12 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #19, .LBB13_22 |
| ; VBITS_GE_256-NEXT: .LBB13_53: // %cond.load73 |
| ; VBITS_GE_256-NEXT: mov w9, #19 // =0x13 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #20, .LBB13_23 |
| ; VBITS_GE_256-NEXT: .LBB13_54: // %cond.load77 |
| ; VBITS_GE_256-NEXT: mov w9, #20 // =0x14 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #21, .LBB13_24 |
| ; VBITS_GE_256-NEXT: .LBB13_55: // %cond.load81 |
| ; VBITS_GE_256-NEXT: mov w9, #21 // =0x15 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #22, .LBB13_25 |
| ; VBITS_GE_256-NEXT: .LBB13_56: // %cond.load85 |
| ; VBITS_GE_256-NEXT: mov w9, #22 // =0x16 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #23, .LBB13_26 |
| ; VBITS_GE_256-NEXT: .LBB13_57: // %cond.load89 |
| ; VBITS_GE_256-NEXT: mov w9, #23 // =0x17 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #24, .LBB13_27 |
| ; VBITS_GE_256-NEXT: .LBB13_58: // %cond.load93 |
| ; VBITS_GE_256-NEXT: mov w9, #24 // =0x18 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #25, .LBB13_28 |
| ; VBITS_GE_256-NEXT: .LBB13_59: // %cond.load97 |
| ; VBITS_GE_256-NEXT: mov w9, #25 // =0x19 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #26, .LBB13_29 |
| ; VBITS_GE_256-NEXT: .LBB13_60: // %cond.load101 |
| ; VBITS_GE_256-NEXT: mov w9, #26 // =0x1a |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #27, .LBB13_30 |
| ; VBITS_GE_256-NEXT: .LBB13_61: // %cond.load105 |
| ; VBITS_GE_256-NEXT: mov w9, #27 // =0x1b |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #28, .LBB13_31 |
| ; VBITS_GE_256-NEXT: .LBB13_62: // %cond.load109 |
| ; VBITS_GE_256-NEXT: mov w9, #28 // =0x1c |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #29, .LBB13_32 |
| ; VBITS_GE_256-NEXT: .LBB13_63: // %cond.load113 |
| ; VBITS_GE_256-NEXT: mov w9, #29 // =0x1d |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #30, .LBB13_33 |
| ; VBITS_GE_256-NEXT: .LBB13_64: // %cond.load117 |
| ; VBITS_GE_256-NEXT: mov w9, #30 // =0x1e |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #31, .LBB13_34 |
| ; VBITS_GE_256-NEXT: b .LBB13_35 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_v32i8i16: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #112 |
| ; VBITS_GE_512-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 112 |
| ; VBITS_GE_512-NEXT: .cfi_offset w19, -8 |
| ; VBITS_GE_512-NEXT: .cfi_offset w20, -16 |
| ; VBITS_GE_512-NEXT: .cfi_offset w21, -24 |
| ; VBITS_GE_512-NEXT: .cfi_offset w22, -32 |
| ; VBITS_GE_512-NEXT: .cfi_offset w23, -40 |
| ; VBITS_GE_512-NEXT: .cfi_offset w24, -48 |
| ; VBITS_GE_512-NEXT: .cfi_offset w25, -56 |
| ; VBITS_GE_512-NEXT: .cfi_offset w26, -64 |
| ; VBITS_GE_512-NEXT: .cfi_offset w27, -72 |
| ; VBITS_GE_512-NEXT: .cfi_offset w28, -80 |
| ; VBITS_GE_512-NEXT: .cfi_offset w30, -88 |
| ; VBITS_GE_512-NEXT: .cfi_offset w29, -96 |
| ; VBITS_GE_512-NEXT: ptrue p1.b, vl32 |
| ; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p0.b, p1/z, z0.b, #0 |
| ; VBITS_GE_512-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p0.b |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[1] |
| ; VBITS_GE_512-NEXT: fmov w6, s0 |
| ; VBITS_GE_512-NEXT: umov w4, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w5, v0.b[8] |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w3, v0.b[9] |
| ; VBITS_GE_512-NEXT: mov z5.b, z0.b[18] |
| ; VBITS_GE_512-NEXT: mov z6.b, z0.b[19] |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[3] |
| ; VBITS_GE_512-NEXT: and w6, w6, #0x1 |
| ; VBITS_GE_512-NEXT: umov w1, v0.b[10] |
| ; VBITS_GE_512-NEXT: mov z7.b, z0.b[20] |
| ; VBITS_GE_512-NEXT: bfi w6, w13, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w18, v0.b[11] |
| ; VBITS_GE_512-NEXT: mov z16.b, z0.b[21] |
| ; VBITS_GE_512-NEXT: ubfiz w13, w4, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w4, w5, #8, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: mov z17.b, z0.b[22] |
| ; VBITS_GE_512-NEXT: fmov w20, s5 |
| ; VBITS_GE_512-NEXT: fmov w21, s6 |
| ; VBITS_GE_512-NEXT: bfi w6, w12, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[12] |
| ; VBITS_GE_512-NEXT: mov z18.b, z0.b[23] |
| ; VBITS_GE_512-NEXT: fmov w22, s7 |
| ; VBITS_GE_512-NEXT: orr w12, w13, w4 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w3, #9, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: umov w17, v0.b[13] |
| ; VBITS_GE_512-NEXT: mov z19.b, z0.b[24] |
| ; VBITS_GE_512-NEXT: fmov w23, s16 |
| ; VBITS_GE_512-NEXT: bfi w6, w11, #3, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w1, #10, #1 |
| ; VBITS_GE_512-NEXT: mov z20.b, z0.b[25] |
| ; VBITS_GE_512-NEXT: fmov w24, s17 |
| ; VBITS_GE_512-NEXT: ubfiz w3, w20, #18, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w4, w21, #19, #1 |
| ; VBITS_GE_512-NEXT: orr w12, w12, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w18, #11, #1 |
| ; VBITS_GE_512-NEXT: mov z21.b, z0.b[26] |
| ; VBITS_GE_512-NEXT: fmov w25, s18 |
| ; VBITS_GE_512-NEXT: ubfiz w1, w22, #20, #1 |
| ; VBITS_GE_512-NEXT: orr w11, w12, w11 |
| ; VBITS_GE_512-NEXT: bfi w6, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: fmov w26, s19 |
| ; VBITS_GE_512-NEXT: orr w3, w3, w4 |
| ; VBITS_GE_512-NEXT: orr w11, w11, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w16, #12, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w23, #21, #1 |
| ; VBITS_GE_512-NEXT: mov z22.b, z0.b[27] |
| ; VBITS_GE_512-NEXT: fmov w27, s20 |
| ; VBITS_GE_512-NEXT: orr w10, w3, w1 |
| ; VBITS_GE_512-NEXT: bfi w6, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w9, w17, #13, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w16, w24, #22, #1 |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[6] |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[15] |
| ; VBITS_GE_512-NEXT: mov z3.b, z0.b[16] |
| ; VBITS_GE_512-NEXT: mov z23.b, z0.b[28] |
| ; VBITS_GE_512-NEXT: fmov w5, s21 |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w25, #23, #1 |
| ; VBITS_GE_512-NEXT: mov z4.b, z0.b[17] |
| ; VBITS_GE_512-NEXT: mov z24.b, z0.b[29] |
| ; VBITS_GE_512-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w16 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w26, #24, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[30] |
| ; VBITS_GE_512-NEXT: fmov w28, s22 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w27, #25, #1 |
| ; VBITS_GE_512-NEXT: fmov w7, s3 |
| ; VBITS_GE_512-NEXT: fmov w29, s23 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w11 |
| ; VBITS_GE_512-NEXT: ubfiz w14, w5, #26, #1 |
| ; VBITS_GE_512-NEXT: fmov w19, s4 |
| ; VBITS_GE_512-NEXT: fmov w30, s24 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w15, #15, #1 |
| ; VBITS_GE_512-NEXT: bfi w6, w8, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w8, w9, w12 |
| ; VBITS_GE_512-NEXT: orr w9, w10, w13 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w14 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w28, #27, #1 |
| ; VBITS_GE_512-NEXT: fmov w14, s2 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w7, #16, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w29, #28, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w19, #17, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w30, #29, #1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[31] |
| ; VBITS_GE_512-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w14, #30, #1 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w12 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: orr w8, w6, w8 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w11 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9, lsl #31 |
| ; VBITS_GE_512-NEXT: tbz w8, #0, .LBB13_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rb { z0.b }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB13_3 |
| ; VBITS_GE_512-NEXT: b .LBB13_4 |
| ; VBITS_GE_512-NEXT: .LBB13_2: |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI13_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI13_0 |
| ; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB13_4 |
| ; VBITS_GE_512-NEXT: .LBB13_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB13_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB13_36 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB13_37 |
| ; VBITS_GE_512-NEXT: .LBB13_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB13_38 |
| ; VBITS_GE_512-NEXT: .LBB13_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB13_39 |
| ; VBITS_GE_512-NEXT: .LBB13_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB13_40 |
| ; VBITS_GE_512-NEXT: .LBB13_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB13_41 |
| ; VBITS_GE_512-NEXT: .LBB13_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB13_42 |
| ; VBITS_GE_512-NEXT: .LBB13_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB13_43 |
| ; VBITS_GE_512-NEXT: .LBB13_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB13_44 |
| ; VBITS_GE_512-NEXT: .LBB13_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB13_45 |
| ; VBITS_GE_512-NEXT: .LBB13_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB13_46 |
| ; VBITS_GE_512-NEXT: .LBB13_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB13_47 |
| ; VBITS_GE_512-NEXT: .LBB13_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB13_48 |
| ; VBITS_GE_512-NEXT: .LBB13_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB13_49 |
| ; VBITS_GE_512-NEXT: .LBB13_18: // %else58 |
| ; VBITS_GE_512-NEXT: tbnz w8, #16, .LBB13_50 |
| ; VBITS_GE_512-NEXT: .LBB13_19: // %else62 |
| ; VBITS_GE_512-NEXT: tbnz w8, #17, .LBB13_51 |
| ; VBITS_GE_512-NEXT: .LBB13_20: // %else66 |
| ; VBITS_GE_512-NEXT: tbnz w8, #18, .LBB13_52 |
| ; VBITS_GE_512-NEXT: .LBB13_21: // %else70 |
| ; VBITS_GE_512-NEXT: tbnz w8, #19, .LBB13_53 |
| ; VBITS_GE_512-NEXT: .LBB13_22: // %else74 |
| ; VBITS_GE_512-NEXT: tbnz w8, #20, .LBB13_54 |
| ; VBITS_GE_512-NEXT: .LBB13_23: // %else78 |
| ; VBITS_GE_512-NEXT: tbnz w8, #21, .LBB13_55 |
| ; VBITS_GE_512-NEXT: .LBB13_24: // %else82 |
| ; VBITS_GE_512-NEXT: tbnz w8, #22, .LBB13_56 |
| ; VBITS_GE_512-NEXT: .LBB13_25: // %else86 |
| ; VBITS_GE_512-NEXT: tbnz w8, #23, .LBB13_57 |
| ; VBITS_GE_512-NEXT: .LBB13_26: // %else90 |
| ; VBITS_GE_512-NEXT: tbnz w8, #24, .LBB13_58 |
| ; VBITS_GE_512-NEXT: .LBB13_27: // %else94 |
| ; VBITS_GE_512-NEXT: tbnz w8, #25, .LBB13_59 |
| ; VBITS_GE_512-NEXT: .LBB13_28: // %else98 |
| ; VBITS_GE_512-NEXT: tbnz w8, #26, .LBB13_60 |
| ; VBITS_GE_512-NEXT: .LBB13_29: // %else102 |
| ; VBITS_GE_512-NEXT: tbnz w8, #27, .LBB13_61 |
| ; VBITS_GE_512-NEXT: .LBB13_30: // %else106 |
| ; VBITS_GE_512-NEXT: tbnz w8, #28, .LBB13_62 |
| ; VBITS_GE_512-NEXT: .LBB13_31: // %else110 |
| ; VBITS_GE_512-NEXT: tbnz w8, #29, .LBB13_63 |
| ; VBITS_GE_512-NEXT: .LBB13_32: // %else114 |
| ; VBITS_GE_512-NEXT: tbnz w8, #30, .LBB13_64 |
| ; VBITS_GE_512-NEXT: .LBB13_33: // %else118 |
| ; VBITS_GE_512-NEXT: tbz w8, #31, .LBB13_35 |
| ; VBITS_GE_512-NEXT: .LBB13_34: // %cond.load121 |
| ; VBITS_GE_512-NEXT: mov w8, #31 // =0x1f |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w8 |
| ; VBITS_GE_512-NEXT: ldrb w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB13_35: // %else122 |
| ; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b |
| ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 |
| ; VBITS_GE_512-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2] |
| ; VBITS_GE_512-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: add sp, sp, #112 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB13_36: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB13_6 |
| ; VBITS_GE_512-NEXT: .LBB13_37: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB13_7 |
| ; VBITS_GE_512-NEXT: .LBB13_38: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB13_8 |
| ; VBITS_GE_512-NEXT: .LBB13_39: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB13_9 |
| ; VBITS_GE_512-NEXT: .LBB13_40: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB13_10 |
| ; VBITS_GE_512-NEXT: .LBB13_41: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB13_11 |
| ; VBITS_GE_512-NEXT: .LBB13_42: // %cond.load29 |
| ; VBITS_GE_512-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB13_12 |
| ; VBITS_GE_512-NEXT: .LBB13_43: // %cond.load33 |
| ; VBITS_GE_512-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB13_13 |
| ; VBITS_GE_512-NEXT: .LBB13_44: // %cond.load37 |
| ; VBITS_GE_512-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB13_14 |
| ; VBITS_GE_512-NEXT: .LBB13_45: // %cond.load41 |
| ; VBITS_GE_512-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB13_15 |
| ; VBITS_GE_512-NEXT: .LBB13_46: // %cond.load45 |
| ; VBITS_GE_512-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB13_16 |
| ; VBITS_GE_512-NEXT: .LBB13_47: // %cond.load49 |
| ; VBITS_GE_512-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB13_17 |
| ; VBITS_GE_512-NEXT: .LBB13_48: // %cond.load53 |
| ; VBITS_GE_512-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB13_18 |
| ; VBITS_GE_512-NEXT: .LBB13_49: // %cond.load57 |
| ; VBITS_GE_512-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #16, .LBB13_19 |
| ; VBITS_GE_512-NEXT: .LBB13_50: // %cond.load61 |
| ; VBITS_GE_512-NEXT: mov w9, #16 // =0x10 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #17, .LBB13_20 |
| ; VBITS_GE_512-NEXT: .LBB13_51: // %cond.load65 |
| ; VBITS_GE_512-NEXT: mov w9, #17 // =0x11 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #18, .LBB13_21 |
| ; VBITS_GE_512-NEXT: .LBB13_52: // %cond.load69 |
| ; VBITS_GE_512-NEXT: mov w9, #18 // =0x12 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #19, .LBB13_22 |
| ; VBITS_GE_512-NEXT: .LBB13_53: // %cond.load73 |
| ; VBITS_GE_512-NEXT: mov w9, #19 // =0x13 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #20, .LBB13_23 |
| ; VBITS_GE_512-NEXT: .LBB13_54: // %cond.load77 |
| ; VBITS_GE_512-NEXT: mov w9, #20 // =0x14 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #21, .LBB13_24 |
| ; VBITS_GE_512-NEXT: .LBB13_55: // %cond.load81 |
| ; VBITS_GE_512-NEXT: mov w9, #21 // =0x15 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #22, .LBB13_25 |
| ; VBITS_GE_512-NEXT: .LBB13_56: // %cond.load85 |
| ; VBITS_GE_512-NEXT: mov w9, #22 // =0x16 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #23, .LBB13_26 |
| ; VBITS_GE_512-NEXT: .LBB13_57: // %cond.load89 |
| ; VBITS_GE_512-NEXT: mov w9, #23 // =0x17 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #24, .LBB13_27 |
| ; VBITS_GE_512-NEXT: .LBB13_58: // %cond.load93 |
| ; VBITS_GE_512-NEXT: mov w9, #24 // =0x18 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #25, .LBB13_28 |
| ; VBITS_GE_512-NEXT: .LBB13_59: // %cond.load97 |
| ; VBITS_GE_512-NEXT: mov w9, #25 // =0x19 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #26, .LBB13_29 |
| ; VBITS_GE_512-NEXT: .LBB13_60: // %cond.load101 |
| ; VBITS_GE_512-NEXT: mov w9, #26 // =0x1a |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #27, .LBB13_30 |
| ; VBITS_GE_512-NEXT: .LBB13_61: // %cond.load105 |
| ; VBITS_GE_512-NEXT: mov w9, #27 // =0x1b |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #28, .LBB13_31 |
| ; VBITS_GE_512-NEXT: .LBB13_62: // %cond.load109 |
| ; VBITS_GE_512-NEXT: mov w9, #28 // =0x1c |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #29, .LBB13_32 |
| ; VBITS_GE_512-NEXT: .LBB13_63: // %cond.load113 |
| ; VBITS_GE_512-NEXT: mov w9, #29 // =0x1d |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #30, .LBB13_33 |
| ; VBITS_GE_512-NEXT: .LBB13_64: // %cond.load117 |
| ; VBITS_GE_512-NEXT: mov w9, #30 // =0x1e |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #31, .LBB13_34 |
| ; VBITS_GE_512-NEXT: b .LBB13_35 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v32i8i16: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.b, vl32 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b |
| ; CHECK-EXPAND-NEXT: whilelo p0.b, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #16 // =0x10 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.h, vl16 |
| ; CHECK-EXPAND-NEXT: expand z0.b, p1, z0.b |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.h, z0.b |
| ; CHECK-EXPAND-NEXT: sunpklo z1.h, z1.b |
| ; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <32 x i8>, ptr %bp |
| %mask = icmp eq <32 x i8> %b, zeroinitializer |
| %load = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %ap, <32 x i1> %mask, <32 x i8> poison) |
| %ext = sext <32 x i8> %load to <32 x i16> |
| store <32 x i16> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v16i8i32(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_v16i8i32: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: ldr q0, [x1] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI14_0 |
| ; VBITS_GE_256-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] |
| ; VBITS_GE_256-NEXT: cmeq v0.16b, v0.16b, #0 |
| ; VBITS_GE_256-NEXT: and v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_256-NEXT: addv h0, v0.8h |
| ; VBITS_GE_256-NEXT: fmov w8, s0 |
| ; VBITS_GE_256-NEXT: tbz w8, #0, .LBB14_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: fmov s0, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB14_3 |
| ; VBITS_GE_256-NEXT: b .LBB14_4 |
| ; VBITS_GE_256-NEXT: .LBB14_2: |
| ; VBITS_GE_256-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB14_4 |
| ; VBITS_GE_256-NEXT: .LBB14_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_256-NEXT: .LBB14_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB14_20 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB14_21 |
| ; VBITS_GE_256-NEXT: .LBB14_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB14_22 |
| ; VBITS_GE_256-NEXT: .LBB14_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB14_23 |
| ; VBITS_GE_256-NEXT: .LBB14_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB14_24 |
| ; VBITS_GE_256-NEXT: .LBB14_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB14_25 |
| ; VBITS_GE_256-NEXT: .LBB14_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB14_26 |
| ; VBITS_GE_256-NEXT: .LBB14_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB14_27 |
| ; VBITS_GE_256-NEXT: .LBB14_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB14_28 |
| ; VBITS_GE_256-NEXT: .LBB14_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB14_29 |
| ; VBITS_GE_256-NEXT: .LBB14_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB14_30 |
| ; VBITS_GE_256-NEXT: .LBB14_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB14_31 |
| ; VBITS_GE_256-NEXT: .LBB14_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB14_32 |
| ; VBITS_GE_256-NEXT: .LBB14_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB14_19 |
| ; VBITS_GE_256-NEXT: .LBB14_18: // %cond.load57 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[15], [x0] |
| ; VBITS_GE_256-NEXT: .LBB14_19: // %else58 |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 |
| ; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b |
| ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB14_20: // %cond.load5 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB14_6 |
| ; VBITS_GE_256-NEXT: .LBB14_21: // %cond.load9 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB14_7 |
| ; VBITS_GE_256-NEXT: .LBB14_22: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB14_8 |
| ; VBITS_GE_256-NEXT: .LBB14_23: // %cond.load17 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB14_9 |
| ; VBITS_GE_256-NEXT: .LBB14_24: // %cond.load21 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB14_10 |
| ; VBITS_GE_256-NEXT: .LBB14_25: // %cond.load25 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB14_11 |
| ; VBITS_GE_256-NEXT: .LBB14_26: // %cond.load29 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[8], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB14_12 |
| ; VBITS_GE_256-NEXT: .LBB14_27: // %cond.load33 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[9], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB14_13 |
| ; VBITS_GE_256-NEXT: .LBB14_28: // %cond.load37 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[10], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB14_14 |
| ; VBITS_GE_256-NEXT: .LBB14_29: // %cond.load41 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[11], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB14_15 |
| ; VBITS_GE_256-NEXT: .LBB14_30: // %cond.load45 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[12], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB14_16 |
| ; VBITS_GE_256-NEXT: .LBB14_31: // %cond.load49 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[13], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB14_17 |
| ; VBITS_GE_256-NEXT: .LBB14_32: // %cond.load53 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[14], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB14_18 |
| ; VBITS_GE_256-NEXT: b .LBB14_19 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_v16i8i32: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: ldr q0, [x1] |
| ; VBITS_GE_512-NEXT: adrp x8, .LCPI14_0 |
| ; VBITS_GE_512-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] |
| ; VBITS_GE_512-NEXT: cmeq v0.16b, v0.16b, #0 |
| ; VBITS_GE_512-NEXT: and v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_512-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_512-NEXT: zip1 v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_512-NEXT: addv h0, v0.8h |
| ; VBITS_GE_512-NEXT: fmov w8, s0 |
| ; VBITS_GE_512-NEXT: tbz w8, #0, .LBB14_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: fmov s0, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB14_3 |
| ; VBITS_GE_512-NEXT: b .LBB14_4 |
| ; VBITS_GE_512-NEXT: .LBB14_2: |
| ; VBITS_GE_512-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB14_4 |
| ; VBITS_GE_512-NEXT: .LBB14_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_512-NEXT: .LBB14_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB14_20 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB14_21 |
| ; VBITS_GE_512-NEXT: .LBB14_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB14_22 |
| ; VBITS_GE_512-NEXT: .LBB14_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB14_23 |
| ; VBITS_GE_512-NEXT: .LBB14_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB14_24 |
| ; VBITS_GE_512-NEXT: .LBB14_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB14_25 |
| ; VBITS_GE_512-NEXT: .LBB14_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB14_26 |
| ; VBITS_GE_512-NEXT: .LBB14_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB14_27 |
| ; VBITS_GE_512-NEXT: .LBB14_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB14_28 |
| ; VBITS_GE_512-NEXT: .LBB14_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB14_29 |
| ; VBITS_GE_512-NEXT: .LBB14_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB14_30 |
| ; VBITS_GE_512-NEXT: .LBB14_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB14_31 |
| ; VBITS_GE_512-NEXT: .LBB14_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB14_32 |
| ; VBITS_GE_512-NEXT: .LBB14_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB14_19 |
| ; VBITS_GE_512-NEXT: .LBB14_18: // %cond.load57 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[15], [x0] |
| ; VBITS_GE_512-NEXT: .LBB14_19: // %else58 |
| ; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b |
| ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 |
| ; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB14_20: // %cond.load5 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB14_6 |
| ; VBITS_GE_512-NEXT: .LBB14_21: // %cond.load9 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB14_7 |
| ; VBITS_GE_512-NEXT: .LBB14_22: // %cond.load13 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB14_8 |
| ; VBITS_GE_512-NEXT: .LBB14_23: // %cond.load17 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB14_9 |
| ; VBITS_GE_512-NEXT: .LBB14_24: // %cond.load21 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB14_10 |
| ; VBITS_GE_512-NEXT: .LBB14_25: // %cond.load25 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB14_11 |
| ; VBITS_GE_512-NEXT: .LBB14_26: // %cond.load29 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[8], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB14_12 |
| ; VBITS_GE_512-NEXT: .LBB14_27: // %cond.load33 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[9], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB14_13 |
| ; VBITS_GE_512-NEXT: .LBB14_28: // %cond.load37 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[10], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB14_14 |
| ; VBITS_GE_512-NEXT: .LBB14_29: // %cond.load41 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[11], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB14_15 |
| ; VBITS_GE_512-NEXT: .LBB14_30: // %cond.load45 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[12], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB14_16 |
| ; VBITS_GE_512-NEXT: .LBB14_31: // %cond.load49 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[13], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB14_17 |
| ; VBITS_GE_512-NEXT: .LBB14_32: // %cond.load53 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[14], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB14_18 |
| ; VBITS_GE_512-NEXT: b .LBB14_19 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v16i8i32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.b, vl16 |
| ; CHECK-EXPAND-NEXT: ldr q0, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b |
| ; CHECK-EXPAND-NEXT: whilelo p0.b, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: expand z0.b, p1, z0.b |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.h, z0.b |
| ; CHECK-EXPAND-NEXT: sunpklo z1.h, z1.b |
| ; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <16 x i8>, ptr %bp |
| %mask = icmp eq <16 x i8> %b, zeroinitializer |
| %load = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %ap, <16 x i1> %mask, <16 x i8> poison) |
| %ext = sext <16 x i8> %load to <16 x i32> |
| store <16 x i32> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v8i8i64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_v8i8i64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: ldr d0, [x1] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI15_0 |
| ; VBITS_GE_256-NEXT: ldr d1, [x8, :lo12:.LCPI15_0] |
| ; VBITS_GE_256-NEXT: cmeq v0.8b, v0.8b, #0 |
| ; VBITS_GE_256-NEXT: and v0.8b, v0.8b, v1.8b |
| ; VBITS_GE_256-NEXT: addv b0, v0.8b |
| ; VBITS_GE_256-NEXT: fmov w8, s0 |
| ; VBITS_GE_256-NEXT: tbz w8, #0, .LBB15_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: fmov s0, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB15_3 |
| ; VBITS_GE_256-NEXT: b .LBB15_4 |
| ; VBITS_GE_256-NEXT: .LBB15_2: |
| ; VBITS_GE_256-NEXT: // implicit-def: $d0 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB15_4 |
| ; VBITS_GE_256-NEXT: .LBB15_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_256-NEXT: .LBB15_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB15_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB15_13 |
| ; VBITS_GE_256-NEXT: .LBB15_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB15_14 |
| ; VBITS_GE_256-NEXT: .LBB15_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB15_15 |
| ; VBITS_GE_256-NEXT: .LBB15_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB15_16 |
| ; VBITS_GE_256-NEXT: .LBB15_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB15_11 |
| ; VBITS_GE_256-NEXT: .LBB15_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0] |
| ; VBITS_GE_256-NEXT: .LBB15_11: // %else26 |
| ; VBITS_GE_256-NEXT: sshll v0.8h, v0.8b, #0 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB15_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB15_6 |
| ; VBITS_GE_256-NEXT: .LBB15_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB15_7 |
| ; VBITS_GE_256-NEXT: .LBB15_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB15_8 |
| ; VBITS_GE_256-NEXT: .LBB15_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB15_9 |
| ; VBITS_GE_256-NEXT: .LBB15_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB15_10 |
| ; VBITS_GE_256-NEXT: b .LBB15_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_v8i8i64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: ldr d0, [x1] |
| ; VBITS_GE_512-NEXT: adrp x8, .LCPI15_0 |
| ; VBITS_GE_512-NEXT: ldr d1, [x8, :lo12:.LCPI15_0] |
| ; VBITS_GE_512-NEXT: cmeq v0.8b, v0.8b, #0 |
| ; VBITS_GE_512-NEXT: and v0.8b, v0.8b, v1.8b |
| ; VBITS_GE_512-NEXT: addv b0, v0.8b |
| ; VBITS_GE_512-NEXT: fmov w8, s0 |
| ; VBITS_GE_512-NEXT: tbz w8, #0, .LBB15_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: fmov s0, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB15_3 |
| ; VBITS_GE_512-NEXT: b .LBB15_4 |
| ; VBITS_GE_512-NEXT: .LBB15_2: |
| ; VBITS_GE_512-NEXT: // implicit-def: $d0 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB15_4 |
| ; VBITS_GE_512-NEXT: .LBB15_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_512-NEXT: .LBB15_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB15_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB15_13 |
| ; VBITS_GE_512-NEXT: .LBB15_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB15_14 |
| ; VBITS_GE_512-NEXT: .LBB15_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB15_15 |
| ; VBITS_GE_512-NEXT: .LBB15_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB15_16 |
| ; VBITS_GE_512-NEXT: .LBB15_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB15_11 |
| ; VBITS_GE_512-NEXT: .LBB15_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0] |
| ; VBITS_GE_512-NEXT: .LBB15_11: // %else26 |
| ; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB15_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB15_6 |
| ; VBITS_GE_512-NEXT: .LBB15_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB15_7 |
| ; VBITS_GE_512-NEXT: .LBB15_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB15_8 |
| ; VBITS_GE_512-NEXT: .LBB15_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB15_9 |
| ; VBITS_GE_512-NEXT: .LBB15_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB15_10 |
| ; VBITS_GE_512-NEXT: b .LBB15_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v8i8i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.b, vl8 |
| ; CHECK-EXPAND-NEXT: ldr d0, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b |
| ; CHECK-EXPAND-NEXT: whilelo p0.b, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: expand z0.b, p1, z0.b |
| ; CHECK-EXPAND-NEXT: sshll v0.8h, v0.8b, #0 |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i8>, ptr %bp |
| %mask = icmp eq <8 x i8> %b, zeroinitializer |
| %load = call <8 x i8> @llvm.masked.expandload.v8i8(ptr %ap, <8 x i1> %mask, <8 x i8> poison) |
| %ext = sext <8 x i8> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v16i16i32(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_v16i16i32: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p1.h, vl16 |
| ; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p0.h, p1/z, z0.h, #0 |
| ; VBITS_GE_256-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p0.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: umov w11, v0.b[7] |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[8] |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[3] |
| ; VBITS_GE_256-NEXT: umov w14, v0.b[4] |
| ; VBITS_GE_256-NEXT: umov w15, v0.b[10] |
| ; VBITS_GE_256-NEXT: umov w16, v0.b[5] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[9] |
| ; VBITS_GE_256-NEXT: ubfiz w11, w11, #7, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #8, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w15, w15, #10, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[11] |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_256-NEXT: bfi w8, w13, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_256-NEXT: ubfiz w9, w9, #9, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w14, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_256-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_256-NEXT: umov w11, v0.b[6] |
| ; VBITS_GE_256-NEXT: ubfiz w10, w10, #11, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w15 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #12, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w16, #5, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w12, #13, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_256-NEXT: bfi w8, w11, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w13, lsl #15 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xffff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB16_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rh { z0.h }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB16_3 |
| ; VBITS_GE_256-NEXT: b .LBB16_4 |
| ; VBITS_GE_256-NEXT: .LBB16_2: |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI16_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI16_0 |
| ; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB16_4 |
| ; VBITS_GE_256-NEXT: .LBB16_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB16_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB16_20 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB16_21 |
| ; VBITS_GE_256-NEXT: .LBB16_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB16_22 |
| ; VBITS_GE_256-NEXT: .LBB16_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB16_23 |
| ; VBITS_GE_256-NEXT: .LBB16_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB16_24 |
| ; VBITS_GE_256-NEXT: .LBB16_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB16_25 |
| ; VBITS_GE_256-NEXT: .LBB16_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB16_26 |
| ; VBITS_GE_256-NEXT: .LBB16_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB16_27 |
| ; VBITS_GE_256-NEXT: .LBB16_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB16_28 |
| ; VBITS_GE_256-NEXT: .LBB16_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB16_29 |
| ; VBITS_GE_256-NEXT: .LBB16_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB16_30 |
| ; VBITS_GE_256-NEXT: .LBB16_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB16_31 |
| ; VBITS_GE_256-NEXT: .LBB16_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB16_32 |
| ; VBITS_GE_256-NEXT: .LBB16_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB16_19 |
| ; VBITS_GE_256-NEXT: .LBB16_18: // %cond.load57 |
| ; VBITS_GE_256-NEXT: mov w8, #15 // =0xf |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w8 |
| ; VBITS_GE_256-NEXT: ldrh w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB16_19: // %else58 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB16_20: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB16_6 |
| ; VBITS_GE_256-NEXT: .LBB16_21: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB16_7 |
| ; VBITS_GE_256-NEXT: .LBB16_22: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB16_8 |
| ; VBITS_GE_256-NEXT: .LBB16_23: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB16_9 |
| ; VBITS_GE_256-NEXT: .LBB16_24: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB16_10 |
| ; VBITS_GE_256-NEXT: .LBB16_25: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB16_11 |
| ; VBITS_GE_256-NEXT: .LBB16_26: // %cond.load29 |
| ; VBITS_GE_256-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB16_12 |
| ; VBITS_GE_256-NEXT: .LBB16_27: // %cond.load33 |
| ; VBITS_GE_256-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB16_13 |
| ; VBITS_GE_256-NEXT: .LBB16_28: // %cond.load37 |
| ; VBITS_GE_256-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB16_14 |
| ; VBITS_GE_256-NEXT: .LBB16_29: // %cond.load41 |
| ; VBITS_GE_256-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB16_15 |
| ; VBITS_GE_256-NEXT: .LBB16_30: // %cond.load45 |
| ; VBITS_GE_256-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB16_16 |
| ; VBITS_GE_256-NEXT: .LBB16_31: // %cond.load49 |
| ; VBITS_GE_256-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB16_17 |
| ; VBITS_GE_256-NEXT: .LBB16_32: // %cond.load53 |
| ; VBITS_GE_256-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB16_18 |
| ; VBITS_GE_256-NEXT: b .LBB16_19 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_v16i16i32: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p1.h, vl16 |
| ; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p0.h, p1/z, z0.h, #0 |
| ; VBITS_GE_512-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[8] |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[3] |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[4] |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[10] |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[5] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[9] |
| ; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[11] |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[6] |
| ; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w15 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xffff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB16_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rh { z0.h }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB16_3 |
| ; VBITS_GE_512-NEXT: b .LBB16_4 |
| ; VBITS_GE_512-NEXT: .LBB16_2: |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI16_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI16_0 |
| ; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB16_4 |
| ; VBITS_GE_512-NEXT: .LBB16_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB16_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB16_20 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB16_21 |
| ; VBITS_GE_512-NEXT: .LBB16_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB16_22 |
| ; VBITS_GE_512-NEXT: .LBB16_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB16_23 |
| ; VBITS_GE_512-NEXT: .LBB16_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB16_24 |
| ; VBITS_GE_512-NEXT: .LBB16_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB16_25 |
| ; VBITS_GE_512-NEXT: .LBB16_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB16_26 |
| ; VBITS_GE_512-NEXT: .LBB16_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB16_27 |
| ; VBITS_GE_512-NEXT: .LBB16_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB16_28 |
| ; VBITS_GE_512-NEXT: .LBB16_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB16_29 |
| ; VBITS_GE_512-NEXT: .LBB16_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB16_30 |
| ; VBITS_GE_512-NEXT: .LBB16_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB16_31 |
| ; VBITS_GE_512-NEXT: .LBB16_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB16_32 |
| ; VBITS_GE_512-NEXT: .LBB16_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB16_19 |
| ; VBITS_GE_512-NEXT: .LBB16_18: // %cond.load57 |
| ; VBITS_GE_512-NEXT: mov w8, #15 // =0xf |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w8 |
| ; VBITS_GE_512-NEXT: ldrh w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB16_19: // %else58 |
| ; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 |
| ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB16_20: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB16_6 |
| ; VBITS_GE_512-NEXT: .LBB16_21: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB16_7 |
| ; VBITS_GE_512-NEXT: .LBB16_22: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB16_8 |
| ; VBITS_GE_512-NEXT: .LBB16_23: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB16_9 |
| ; VBITS_GE_512-NEXT: .LBB16_24: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB16_10 |
| ; VBITS_GE_512-NEXT: .LBB16_25: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB16_11 |
| ; VBITS_GE_512-NEXT: .LBB16_26: // %cond.load29 |
| ; VBITS_GE_512-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB16_12 |
| ; VBITS_GE_512-NEXT: .LBB16_27: // %cond.load33 |
| ; VBITS_GE_512-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB16_13 |
| ; VBITS_GE_512-NEXT: .LBB16_28: // %cond.load37 |
| ; VBITS_GE_512-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB16_14 |
| ; VBITS_GE_512-NEXT: .LBB16_29: // %cond.load41 |
| ; VBITS_GE_512-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB16_15 |
| ; VBITS_GE_512-NEXT: .LBB16_30: // %cond.load45 |
| ; VBITS_GE_512-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB16_16 |
| ; VBITS_GE_512-NEXT: .LBB16_31: // %cond.load49 |
| ; VBITS_GE_512-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB16_17 |
| ; VBITS_GE_512-NEXT: .LBB16_32: // %cond.load53 |
| ; VBITS_GE_512-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB16_18 |
| ; VBITS_GE_512-NEXT: b .LBB16_19 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v16i16i32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.h, vl16 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h |
| ; CHECK-EXPAND-NEXT: whilelo p0.h, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <16 x i16>, ptr %bp |
| %mask = icmp eq <16 x i16> %b, zeroinitializer |
| %load = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %ap, <16 x i1> %mask, <16 x i16> poison) |
| %ext = sext <16 x i16> %load to <16 x i32> |
| store <16 x i32> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v8i16i64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_v8i16i64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: ldr q0, [x1] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI17_0 |
| ; VBITS_GE_256-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] |
| ; VBITS_GE_256-NEXT: cmeq v0.8h, v0.8h, #0 |
| ; VBITS_GE_256-NEXT: and v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_256-NEXT: addv h0, v0.8h |
| ; VBITS_GE_256-NEXT: fmov w8, s0 |
| ; VBITS_GE_256-NEXT: tbz w8, #0, .LBB17_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: fmov s0, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB17_3 |
| ; VBITS_GE_256-NEXT: b .LBB17_4 |
| ; VBITS_GE_256-NEXT: .LBB17_2: |
| ; VBITS_GE_256-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB17_4 |
| ; VBITS_GE_256-NEXT: .LBB17_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[1], [x0], #2 |
| ; VBITS_GE_256-NEXT: .LBB17_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB17_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB17_13 |
| ; VBITS_GE_256-NEXT: .LBB17_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB17_14 |
| ; VBITS_GE_256-NEXT: .LBB17_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB17_15 |
| ; VBITS_GE_256-NEXT: .LBB17_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB17_16 |
| ; VBITS_GE_256-NEXT: .LBB17_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB17_11 |
| ; VBITS_GE_256-NEXT: .LBB17_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[7], [x0] |
| ; VBITS_GE_256-NEXT: .LBB17_11: // %else26 |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB17_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[2], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB17_6 |
| ; VBITS_GE_256-NEXT: .LBB17_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[3], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB17_7 |
| ; VBITS_GE_256-NEXT: .LBB17_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[4], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB17_8 |
| ; VBITS_GE_256-NEXT: .LBB17_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[5], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB17_9 |
| ; VBITS_GE_256-NEXT: .LBB17_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[6], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB17_10 |
| ; VBITS_GE_256-NEXT: b .LBB17_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_v8i16i64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: ldr q0, [x1] |
| ; VBITS_GE_512-NEXT: adrp x8, .LCPI17_0 |
| ; VBITS_GE_512-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] |
| ; VBITS_GE_512-NEXT: cmeq v0.8h, v0.8h, #0 |
| ; VBITS_GE_512-NEXT: and v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_512-NEXT: addv h0, v0.8h |
| ; VBITS_GE_512-NEXT: fmov w8, s0 |
| ; VBITS_GE_512-NEXT: tbz w8, #0, .LBB17_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: fmov s0, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB17_3 |
| ; VBITS_GE_512-NEXT: b .LBB17_4 |
| ; VBITS_GE_512-NEXT: .LBB17_2: |
| ; VBITS_GE_512-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB17_4 |
| ; VBITS_GE_512-NEXT: .LBB17_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[1], [x0], #2 |
| ; VBITS_GE_512-NEXT: .LBB17_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB17_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB17_13 |
| ; VBITS_GE_512-NEXT: .LBB17_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB17_14 |
| ; VBITS_GE_512-NEXT: .LBB17_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB17_15 |
| ; VBITS_GE_512-NEXT: .LBB17_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB17_16 |
| ; VBITS_GE_512-NEXT: .LBB17_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB17_11 |
| ; VBITS_GE_512-NEXT: .LBB17_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[7], [x0] |
| ; VBITS_GE_512-NEXT: .LBB17_11: // %else26 |
| ; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB17_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[2], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB17_6 |
| ; VBITS_GE_512-NEXT: .LBB17_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[3], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB17_7 |
| ; VBITS_GE_512-NEXT: .LBB17_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[4], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB17_8 |
| ; VBITS_GE_512-NEXT: .LBB17_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[5], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB17_9 |
| ; VBITS_GE_512-NEXT: .LBB17_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[6], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB17_10 |
| ; VBITS_GE_512-NEXT: b .LBB17_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v8i16i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.h, vl8 |
| ; CHECK-EXPAND-NEXT: ldr q0, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h |
| ; CHECK-EXPAND-NEXT: whilelo p0.h, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i16>, ptr %bp |
| %mask = icmp eq <8 x i16> %b, zeroinitializer |
| %load = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %ap, <8 x i1> %mask, <8 x i16> poison) |
| %ext = sext <8 x i16> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v8i32i64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_v8i32i64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl8 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p0.s, p1/z, z0.s, #0 |
| ; VBITS_GE_256-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p0.s |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB18_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rw { z0.s }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #4 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB18_3 |
| ; VBITS_GE_256-NEXT: b .LBB18_4 |
| ; VBITS_GE_256-NEXT: .LBB18_2: |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI18_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI18_0 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB18_4 |
| ; VBITS_GE_256-NEXT: .LBB18_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB18_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB18_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB18_13 |
| ; VBITS_GE_256-NEXT: .LBB18_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB18_14 |
| ; VBITS_GE_256-NEXT: .LBB18_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB18_15 |
| ; VBITS_GE_256-NEXT: .LBB18_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB18_16 |
| ; VBITS_GE_256-NEXT: .LBB18_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB18_11 |
| ; VBITS_GE_256-NEXT: .LBB18_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w8 |
| ; VBITS_GE_256-NEXT: ldr w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB18_11: // %else26 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB18_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB18_6 |
| ; VBITS_GE_256-NEXT: .LBB18_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB18_7 |
| ; VBITS_GE_256-NEXT: .LBB18_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB18_8 |
| ; VBITS_GE_256-NEXT: .LBB18_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB18_9 |
| ; VBITS_GE_256-NEXT: .LBB18_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB18_10 |
| ; VBITS_GE_256-NEXT: b .LBB18_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_v8i32i64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p1.s, vl8 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p0.s, p1/z, z0.s, #0 |
| ; VBITS_GE_512-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p0.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB18_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rw { z0.s }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #4 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB18_3 |
| ; VBITS_GE_512-NEXT: b .LBB18_4 |
| ; VBITS_GE_512-NEXT: .LBB18_2: |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI18_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI18_0 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB18_4 |
| ; VBITS_GE_512-NEXT: .LBB18_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB18_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB18_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB18_13 |
| ; VBITS_GE_512-NEXT: .LBB18_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB18_14 |
| ; VBITS_GE_512-NEXT: .LBB18_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB18_15 |
| ; VBITS_GE_512-NEXT: .LBB18_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB18_16 |
| ; VBITS_GE_512-NEXT: .LBB18_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB18_11 |
| ; VBITS_GE_512-NEXT: .LBB18_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w8 |
| ; VBITS_GE_512-NEXT: ldr w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB18_11: // %else26 |
| ; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB18_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB18_6 |
| ; VBITS_GE_512-NEXT: .LBB18_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB18_7 |
| ; VBITS_GE_512-NEXT: .LBB18_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB18_8 |
| ; VBITS_GE_512-NEXT: .LBB18_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB18_9 |
| ; VBITS_GE_512-NEXT: .LBB18_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB18_10 |
| ; VBITS_GE_512-NEXT: b .LBB18_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v8i32i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s |
| ; CHECK-EXPAND-NEXT: whilelo p0.s, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i32>, ptr %bp |
| %mask = icmp eq <8 x i32> %b, zeroinitializer |
| %load = call <8 x i32> @llvm.masked.expandload.v8i32(ptr %ap, <8 x i1> %mask, <8 x i32> poison) |
| %ext = sext <8 x i32> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v32i8i16(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_v32i8i16: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #112 |
| ; VBITS_GE_256-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 112 |
| ; VBITS_GE_256-NEXT: .cfi_offset w19, -8 |
| ; VBITS_GE_256-NEXT: .cfi_offset w20, -16 |
| ; VBITS_GE_256-NEXT: .cfi_offset w21, -24 |
| ; VBITS_GE_256-NEXT: .cfi_offset w22, -32 |
| ; VBITS_GE_256-NEXT: .cfi_offset w23, -40 |
| ; VBITS_GE_256-NEXT: .cfi_offset w24, -48 |
| ; VBITS_GE_256-NEXT: .cfi_offset w25, -56 |
| ; VBITS_GE_256-NEXT: .cfi_offset w26, -64 |
| ; VBITS_GE_256-NEXT: .cfi_offset w27, -72 |
| ; VBITS_GE_256-NEXT: .cfi_offset w28, -80 |
| ; VBITS_GE_256-NEXT: .cfi_offset w30, -88 |
| ; VBITS_GE_256-NEXT: .cfi_offset w29, -96 |
| ; VBITS_GE_256-NEXT: ptrue p1.b, vl32 |
| ; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p0.b, p1/z, z0.b, #0 |
| ; VBITS_GE_256-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p0.b |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[1] |
| ; VBITS_GE_256-NEXT: fmov w6, s0 |
| ; VBITS_GE_256-NEXT: umov w4, v0.b[7] |
| ; VBITS_GE_256-NEXT: umov w5, v0.b[8] |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[2] |
| ; VBITS_GE_256-NEXT: umov w3, v0.b[9] |
| ; VBITS_GE_256-NEXT: mov z5.b, z0.b[18] |
| ; VBITS_GE_256-NEXT: mov z6.b, z0.b[19] |
| ; VBITS_GE_256-NEXT: umov w11, v0.b[3] |
| ; VBITS_GE_256-NEXT: and w6, w6, #0x1 |
| ; VBITS_GE_256-NEXT: umov w1, v0.b[10] |
| ; VBITS_GE_256-NEXT: mov z7.b, z0.b[20] |
| ; VBITS_GE_256-NEXT: bfi w6, w13, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w18, v0.b[11] |
| ; VBITS_GE_256-NEXT: mov z16.b, z0.b[21] |
| ; VBITS_GE_256-NEXT: ubfiz w13, w4, #7, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w4, w5, #8, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: mov z17.b, z0.b[22] |
| ; VBITS_GE_256-NEXT: fmov w20, s5 |
| ; VBITS_GE_256-NEXT: fmov w21, s6 |
| ; VBITS_GE_256-NEXT: bfi w6, w12, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w16, v0.b[12] |
| ; VBITS_GE_256-NEXT: mov z18.b, z0.b[23] |
| ; VBITS_GE_256-NEXT: fmov w22, s7 |
| ; VBITS_GE_256-NEXT: orr w12, w13, w4 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w3, #9, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: umov w17, v0.b[13] |
| ; VBITS_GE_256-NEXT: mov z19.b, z0.b[24] |
| ; VBITS_GE_256-NEXT: fmov w23, s16 |
| ; VBITS_GE_256-NEXT: bfi w6, w11, #3, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w1, #10, #1 |
| ; VBITS_GE_256-NEXT: mov z20.b, z0.b[25] |
| ; VBITS_GE_256-NEXT: fmov w24, s17 |
| ; VBITS_GE_256-NEXT: ubfiz w3, w20, #18, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w4, w21, #19, #1 |
| ; VBITS_GE_256-NEXT: orr w12, w12, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w18, #11, #1 |
| ; VBITS_GE_256-NEXT: mov z21.b, z0.b[26] |
| ; VBITS_GE_256-NEXT: fmov w25, s18 |
| ; VBITS_GE_256-NEXT: ubfiz w1, w22, #20, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w12, w11 |
| ; VBITS_GE_256-NEXT: bfi w6, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_256-NEXT: fmov w26, s19 |
| ; VBITS_GE_256-NEXT: orr w3, w3, w4 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w16, #12, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w23, #21, #1 |
| ; VBITS_GE_256-NEXT: mov z22.b, z0.b[27] |
| ; VBITS_GE_256-NEXT: fmov w27, s20 |
| ; VBITS_GE_256-NEXT: orr w10, w3, w1 |
| ; VBITS_GE_256-NEXT: bfi w6, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w9, w17, #13, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w24, #22, #1 |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[6] |
| ; VBITS_GE_256-NEXT: umov w15, v0.b[15] |
| ; VBITS_GE_256-NEXT: mov z3.b, z0.b[16] |
| ; VBITS_GE_256-NEXT: mov z23.b, z0.b[28] |
| ; VBITS_GE_256-NEXT: fmov w5, s21 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: orr w10, w10, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w25, #23, #1 |
| ; VBITS_GE_256-NEXT: mov z4.b, z0.b[17] |
| ; VBITS_GE_256-NEXT: mov z24.b, z0.b[29] |
| ; VBITS_GE_256-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_256-NEXT: orr w10, w10, w16 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w26, #24, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, z0.b[30] |
| ; VBITS_GE_256-NEXT: fmov w28, s22 |
| ; VBITS_GE_256-NEXT: orr w10, w10, w12 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w27, #25, #1 |
| ; VBITS_GE_256-NEXT: fmov w7, s3 |
| ; VBITS_GE_256-NEXT: fmov w29, s23 |
| ; VBITS_GE_256-NEXT: orr w10, w10, w11 |
| ; VBITS_GE_256-NEXT: ubfiz w14, w5, #26, #1 |
| ; VBITS_GE_256-NEXT: fmov w19, s4 |
| ; VBITS_GE_256-NEXT: fmov w30, s24 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w15, #15, #1 |
| ; VBITS_GE_256-NEXT: bfi w6, w8, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w8, w9, w12 |
| ; VBITS_GE_256-NEXT: orr w9, w10, w13 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w14 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w28, #27, #1 |
| ; VBITS_GE_256-NEXT: fmov w14, s2 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w7, #16, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w29, #28, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w19, #17, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w30, #29, #1 |
| ; VBITS_GE_256-NEXT: mov z1.b, z0.b[31] |
| ; VBITS_GE_256-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w14, #30, #1 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w12 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_256-NEXT: orr w8, w6, w8 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w11 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_256-NEXT: fmov w9, s1 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w9, lsl #31 |
| ; VBITS_GE_256-NEXT: tbz w8, #0, .LBB19_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rb { z0.b }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #1 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB19_3 |
| ; VBITS_GE_256-NEXT: b .LBB19_4 |
| ; VBITS_GE_256-NEXT: .LBB19_2: |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI19_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI19_0 |
| ; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB19_4 |
| ; VBITS_GE_256-NEXT: .LBB19_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB19_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB19_36 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB19_37 |
| ; VBITS_GE_256-NEXT: .LBB19_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB19_38 |
| ; VBITS_GE_256-NEXT: .LBB19_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB19_39 |
| ; VBITS_GE_256-NEXT: .LBB19_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB19_40 |
| ; VBITS_GE_256-NEXT: .LBB19_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB19_41 |
| ; VBITS_GE_256-NEXT: .LBB19_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB19_42 |
| ; VBITS_GE_256-NEXT: .LBB19_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB19_43 |
| ; VBITS_GE_256-NEXT: .LBB19_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB19_44 |
| ; VBITS_GE_256-NEXT: .LBB19_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB19_45 |
| ; VBITS_GE_256-NEXT: .LBB19_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB19_46 |
| ; VBITS_GE_256-NEXT: .LBB19_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB19_47 |
| ; VBITS_GE_256-NEXT: .LBB19_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB19_48 |
| ; VBITS_GE_256-NEXT: .LBB19_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB19_49 |
| ; VBITS_GE_256-NEXT: .LBB19_18: // %else58 |
| ; VBITS_GE_256-NEXT: tbnz w8, #16, .LBB19_50 |
| ; VBITS_GE_256-NEXT: .LBB19_19: // %else62 |
| ; VBITS_GE_256-NEXT: tbnz w8, #17, .LBB19_51 |
| ; VBITS_GE_256-NEXT: .LBB19_20: // %else66 |
| ; VBITS_GE_256-NEXT: tbnz w8, #18, .LBB19_52 |
| ; VBITS_GE_256-NEXT: .LBB19_21: // %else70 |
| ; VBITS_GE_256-NEXT: tbnz w8, #19, .LBB19_53 |
| ; VBITS_GE_256-NEXT: .LBB19_22: // %else74 |
| ; VBITS_GE_256-NEXT: tbnz w8, #20, .LBB19_54 |
| ; VBITS_GE_256-NEXT: .LBB19_23: // %else78 |
| ; VBITS_GE_256-NEXT: tbnz w8, #21, .LBB19_55 |
| ; VBITS_GE_256-NEXT: .LBB19_24: // %else82 |
| ; VBITS_GE_256-NEXT: tbnz w8, #22, .LBB19_56 |
| ; VBITS_GE_256-NEXT: .LBB19_25: // %else86 |
| ; VBITS_GE_256-NEXT: tbnz w8, #23, .LBB19_57 |
| ; VBITS_GE_256-NEXT: .LBB19_26: // %else90 |
| ; VBITS_GE_256-NEXT: tbnz w8, #24, .LBB19_58 |
| ; VBITS_GE_256-NEXT: .LBB19_27: // %else94 |
| ; VBITS_GE_256-NEXT: tbnz w8, #25, .LBB19_59 |
| ; VBITS_GE_256-NEXT: .LBB19_28: // %else98 |
| ; VBITS_GE_256-NEXT: tbnz w8, #26, .LBB19_60 |
| ; VBITS_GE_256-NEXT: .LBB19_29: // %else102 |
| ; VBITS_GE_256-NEXT: tbnz w8, #27, .LBB19_61 |
| ; VBITS_GE_256-NEXT: .LBB19_30: // %else106 |
| ; VBITS_GE_256-NEXT: tbnz w8, #28, .LBB19_62 |
| ; VBITS_GE_256-NEXT: .LBB19_31: // %else110 |
| ; VBITS_GE_256-NEXT: tbnz w8, #29, .LBB19_63 |
| ; VBITS_GE_256-NEXT: .LBB19_32: // %else114 |
| ; VBITS_GE_256-NEXT: tbnz w8, #30, .LBB19_64 |
| ; VBITS_GE_256-NEXT: .LBB19_33: // %else118 |
| ; VBITS_GE_256-NEXT: tbz w8, #31, .LBB19_35 |
| ; VBITS_GE_256-NEXT: .LBB19_34: // %cond.load121 |
| ; VBITS_GE_256-NEXT: mov w8, #31 // =0x1f |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w8 |
| ; VBITS_GE_256-NEXT: ldrb w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB19_35: // %else122 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b |
| ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 |
| ; VBITS_GE_256-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b |
| ; VBITS_GE_256-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2] |
| ; VBITS_GE_256-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1] |
| ; VBITS_GE_256-NEXT: add sp, sp, #112 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB19_36: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB19_6 |
| ; VBITS_GE_256-NEXT: .LBB19_37: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB19_7 |
| ; VBITS_GE_256-NEXT: .LBB19_38: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB19_8 |
| ; VBITS_GE_256-NEXT: .LBB19_39: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB19_9 |
| ; VBITS_GE_256-NEXT: .LBB19_40: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB19_10 |
| ; VBITS_GE_256-NEXT: .LBB19_41: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB19_11 |
| ; VBITS_GE_256-NEXT: .LBB19_42: // %cond.load29 |
| ; VBITS_GE_256-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB19_12 |
| ; VBITS_GE_256-NEXT: .LBB19_43: // %cond.load33 |
| ; VBITS_GE_256-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB19_13 |
| ; VBITS_GE_256-NEXT: .LBB19_44: // %cond.load37 |
| ; VBITS_GE_256-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB19_14 |
| ; VBITS_GE_256-NEXT: .LBB19_45: // %cond.load41 |
| ; VBITS_GE_256-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB19_15 |
| ; VBITS_GE_256-NEXT: .LBB19_46: // %cond.load45 |
| ; VBITS_GE_256-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB19_16 |
| ; VBITS_GE_256-NEXT: .LBB19_47: // %cond.load49 |
| ; VBITS_GE_256-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB19_17 |
| ; VBITS_GE_256-NEXT: .LBB19_48: // %cond.load53 |
| ; VBITS_GE_256-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB19_18 |
| ; VBITS_GE_256-NEXT: .LBB19_49: // %cond.load57 |
| ; VBITS_GE_256-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #16, .LBB19_19 |
| ; VBITS_GE_256-NEXT: .LBB19_50: // %cond.load61 |
| ; VBITS_GE_256-NEXT: mov w9, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #17, .LBB19_20 |
| ; VBITS_GE_256-NEXT: .LBB19_51: // %cond.load65 |
| ; VBITS_GE_256-NEXT: mov w9, #17 // =0x11 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #18, .LBB19_21 |
| ; VBITS_GE_256-NEXT: .LBB19_52: // %cond.load69 |
| ; VBITS_GE_256-NEXT: mov w9, #18 // =0x12 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #19, .LBB19_22 |
| ; VBITS_GE_256-NEXT: .LBB19_53: // %cond.load73 |
| ; VBITS_GE_256-NEXT: mov w9, #19 // =0x13 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #20, .LBB19_23 |
| ; VBITS_GE_256-NEXT: .LBB19_54: // %cond.load77 |
| ; VBITS_GE_256-NEXT: mov w9, #20 // =0x14 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #21, .LBB19_24 |
| ; VBITS_GE_256-NEXT: .LBB19_55: // %cond.load81 |
| ; VBITS_GE_256-NEXT: mov w9, #21 // =0x15 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #22, .LBB19_25 |
| ; VBITS_GE_256-NEXT: .LBB19_56: // %cond.load85 |
| ; VBITS_GE_256-NEXT: mov w9, #22 // =0x16 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #23, .LBB19_26 |
| ; VBITS_GE_256-NEXT: .LBB19_57: // %cond.load89 |
| ; VBITS_GE_256-NEXT: mov w9, #23 // =0x17 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #24, .LBB19_27 |
| ; VBITS_GE_256-NEXT: .LBB19_58: // %cond.load93 |
| ; VBITS_GE_256-NEXT: mov w9, #24 // =0x18 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #25, .LBB19_28 |
| ; VBITS_GE_256-NEXT: .LBB19_59: // %cond.load97 |
| ; VBITS_GE_256-NEXT: mov w9, #25 // =0x19 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #26, .LBB19_29 |
| ; VBITS_GE_256-NEXT: .LBB19_60: // %cond.load101 |
| ; VBITS_GE_256-NEXT: mov w9, #26 // =0x1a |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #27, .LBB19_30 |
| ; VBITS_GE_256-NEXT: .LBB19_61: // %cond.load105 |
| ; VBITS_GE_256-NEXT: mov w9, #27 // =0x1b |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #28, .LBB19_31 |
| ; VBITS_GE_256-NEXT: .LBB19_62: // %cond.load109 |
| ; VBITS_GE_256-NEXT: mov w9, #28 // =0x1c |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #29, .LBB19_32 |
| ; VBITS_GE_256-NEXT: .LBB19_63: // %cond.load113 |
| ; VBITS_GE_256-NEXT: mov w9, #29 // =0x1d |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #30, .LBB19_33 |
| ; VBITS_GE_256-NEXT: .LBB19_64: // %cond.load117 |
| ; VBITS_GE_256-NEXT: mov w9, #30 // =0x1e |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #31, .LBB19_34 |
| ; VBITS_GE_256-NEXT: b .LBB19_35 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_v32i8i16: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #112 |
| ; VBITS_GE_512-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 112 |
| ; VBITS_GE_512-NEXT: .cfi_offset w19, -8 |
| ; VBITS_GE_512-NEXT: .cfi_offset w20, -16 |
| ; VBITS_GE_512-NEXT: .cfi_offset w21, -24 |
| ; VBITS_GE_512-NEXT: .cfi_offset w22, -32 |
| ; VBITS_GE_512-NEXT: .cfi_offset w23, -40 |
| ; VBITS_GE_512-NEXT: .cfi_offset w24, -48 |
| ; VBITS_GE_512-NEXT: .cfi_offset w25, -56 |
| ; VBITS_GE_512-NEXT: .cfi_offset w26, -64 |
| ; VBITS_GE_512-NEXT: .cfi_offset w27, -72 |
| ; VBITS_GE_512-NEXT: .cfi_offset w28, -80 |
| ; VBITS_GE_512-NEXT: .cfi_offset w30, -88 |
| ; VBITS_GE_512-NEXT: .cfi_offset w29, -96 |
| ; VBITS_GE_512-NEXT: ptrue p1.b, vl32 |
| ; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p0.b, p1/z, z0.b, #0 |
| ; VBITS_GE_512-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p0.b |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[1] |
| ; VBITS_GE_512-NEXT: fmov w6, s0 |
| ; VBITS_GE_512-NEXT: umov w4, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w5, v0.b[8] |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w3, v0.b[9] |
| ; VBITS_GE_512-NEXT: mov z5.b, z0.b[18] |
| ; VBITS_GE_512-NEXT: mov z6.b, z0.b[19] |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[3] |
| ; VBITS_GE_512-NEXT: and w6, w6, #0x1 |
| ; VBITS_GE_512-NEXT: umov w1, v0.b[10] |
| ; VBITS_GE_512-NEXT: mov z7.b, z0.b[20] |
| ; VBITS_GE_512-NEXT: bfi w6, w13, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w18, v0.b[11] |
| ; VBITS_GE_512-NEXT: mov z16.b, z0.b[21] |
| ; VBITS_GE_512-NEXT: ubfiz w13, w4, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w4, w5, #8, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: mov z17.b, z0.b[22] |
| ; VBITS_GE_512-NEXT: fmov w20, s5 |
| ; VBITS_GE_512-NEXT: fmov w21, s6 |
| ; VBITS_GE_512-NEXT: bfi w6, w12, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[12] |
| ; VBITS_GE_512-NEXT: mov z18.b, z0.b[23] |
| ; VBITS_GE_512-NEXT: fmov w22, s7 |
| ; VBITS_GE_512-NEXT: orr w12, w13, w4 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w3, #9, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: umov w17, v0.b[13] |
| ; VBITS_GE_512-NEXT: mov z19.b, z0.b[24] |
| ; VBITS_GE_512-NEXT: fmov w23, s16 |
| ; VBITS_GE_512-NEXT: bfi w6, w11, #3, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w1, #10, #1 |
| ; VBITS_GE_512-NEXT: mov z20.b, z0.b[25] |
| ; VBITS_GE_512-NEXT: fmov w24, s17 |
| ; VBITS_GE_512-NEXT: ubfiz w3, w20, #18, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w4, w21, #19, #1 |
| ; VBITS_GE_512-NEXT: orr w12, w12, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w18, #11, #1 |
| ; VBITS_GE_512-NEXT: mov z21.b, z0.b[26] |
| ; VBITS_GE_512-NEXT: fmov w25, s18 |
| ; VBITS_GE_512-NEXT: ubfiz w1, w22, #20, #1 |
| ; VBITS_GE_512-NEXT: orr w11, w12, w11 |
| ; VBITS_GE_512-NEXT: bfi w6, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: fmov w26, s19 |
| ; VBITS_GE_512-NEXT: orr w3, w3, w4 |
| ; VBITS_GE_512-NEXT: orr w11, w11, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w16, #12, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w23, #21, #1 |
| ; VBITS_GE_512-NEXT: mov z22.b, z0.b[27] |
| ; VBITS_GE_512-NEXT: fmov w27, s20 |
| ; VBITS_GE_512-NEXT: orr w10, w3, w1 |
| ; VBITS_GE_512-NEXT: bfi w6, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w9, w17, #13, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w16, w24, #22, #1 |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[6] |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[15] |
| ; VBITS_GE_512-NEXT: mov z3.b, z0.b[16] |
| ; VBITS_GE_512-NEXT: mov z23.b, z0.b[28] |
| ; VBITS_GE_512-NEXT: fmov w5, s21 |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w25, #23, #1 |
| ; VBITS_GE_512-NEXT: mov z4.b, z0.b[17] |
| ; VBITS_GE_512-NEXT: mov z24.b, z0.b[29] |
| ; VBITS_GE_512-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w16 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w26, #24, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[30] |
| ; VBITS_GE_512-NEXT: fmov w28, s22 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w27, #25, #1 |
| ; VBITS_GE_512-NEXT: fmov w7, s3 |
| ; VBITS_GE_512-NEXT: fmov w29, s23 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w11 |
| ; VBITS_GE_512-NEXT: ubfiz w14, w5, #26, #1 |
| ; VBITS_GE_512-NEXT: fmov w19, s4 |
| ; VBITS_GE_512-NEXT: fmov w30, s24 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w15, #15, #1 |
| ; VBITS_GE_512-NEXT: bfi w6, w8, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w8, w9, w12 |
| ; VBITS_GE_512-NEXT: orr w9, w10, w13 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w14 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w28, #27, #1 |
| ; VBITS_GE_512-NEXT: fmov w14, s2 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w7, #16, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w29, #28, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w19, #17, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w30, #29, #1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[31] |
| ; VBITS_GE_512-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w14, #30, #1 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w12 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: orr w8, w6, w8 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w11 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9, lsl #31 |
| ; VBITS_GE_512-NEXT: tbz w8, #0, .LBB19_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rb { z0.b }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB19_3 |
| ; VBITS_GE_512-NEXT: b .LBB19_4 |
| ; VBITS_GE_512-NEXT: .LBB19_2: |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI19_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI19_0 |
| ; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB19_4 |
| ; VBITS_GE_512-NEXT: .LBB19_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB19_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB19_36 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB19_37 |
| ; VBITS_GE_512-NEXT: .LBB19_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB19_38 |
| ; VBITS_GE_512-NEXT: .LBB19_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB19_39 |
| ; VBITS_GE_512-NEXT: .LBB19_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB19_40 |
| ; VBITS_GE_512-NEXT: .LBB19_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB19_41 |
| ; VBITS_GE_512-NEXT: .LBB19_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB19_42 |
| ; VBITS_GE_512-NEXT: .LBB19_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB19_43 |
| ; VBITS_GE_512-NEXT: .LBB19_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB19_44 |
| ; VBITS_GE_512-NEXT: .LBB19_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB19_45 |
| ; VBITS_GE_512-NEXT: .LBB19_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB19_46 |
| ; VBITS_GE_512-NEXT: .LBB19_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB19_47 |
| ; VBITS_GE_512-NEXT: .LBB19_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB19_48 |
| ; VBITS_GE_512-NEXT: .LBB19_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB19_49 |
| ; VBITS_GE_512-NEXT: .LBB19_18: // %else58 |
| ; VBITS_GE_512-NEXT: tbnz w8, #16, .LBB19_50 |
| ; VBITS_GE_512-NEXT: .LBB19_19: // %else62 |
| ; VBITS_GE_512-NEXT: tbnz w8, #17, .LBB19_51 |
| ; VBITS_GE_512-NEXT: .LBB19_20: // %else66 |
| ; VBITS_GE_512-NEXT: tbnz w8, #18, .LBB19_52 |
| ; VBITS_GE_512-NEXT: .LBB19_21: // %else70 |
| ; VBITS_GE_512-NEXT: tbnz w8, #19, .LBB19_53 |
| ; VBITS_GE_512-NEXT: .LBB19_22: // %else74 |
| ; VBITS_GE_512-NEXT: tbnz w8, #20, .LBB19_54 |
| ; VBITS_GE_512-NEXT: .LBB19_23: // %else78 |
| ; VBITS_GE_512-NEXT: tbnz w8, #21, .LBB19_55 |
| ; VBITS_GE_512-NEXT: .LBB19_24: // %else82 |
| ; VBITS_GE_512-NEXT: tbnz w8, #22, .LBB19_56 |
| ; VBITS_GE_512-NEXT: .LBB19_25: // %else86 |
| ; VBITS_GE_512-NEXT: tbnz w8, #23, .LBB19_57 |
| ; VBITS_GE_512-NEXT: .LBB19_26: // %else90 |
| ; VBITS_GE_512-NEXT: tbnz w8, #24, .LBB19_58 |
| ; VBITS_GE_512-NEXT: .LBB19_27: // %else94 |
| ; VBITS_GE_512-NEXT: tbnz w8, #25, .LBB19_59 |
| ; VBITS_GE_512-NEXT: .LBB19_28: // %else98 |
| ; VBITS_GE_512-NEXT: tbnz w8, #26, .LBB19_60 |
| ; VBITS_GE_512-NEXT: .LBB19_29: // %else102 |
| ; VBITS_GE_512-NEXT: tbnz w8, #27, .LBB19_61 |
| ; VBITS_GE_512-NEXT: .LBB19_30: // %else106 |
| ; VBITS_GE_512-NEXT: tbnz w8, #28, .LBB19_62 |
| ; VBITS_GE_512-NEXT: .LBB19_31: // %else110 |
| ; VBITS_GE_512-NEXT: tbnz w8, #29, .LBB19_63 |
| ; VBITS_GE_512-NEXT: .LBB19_32: // %else114 |
| ; VBITS_GE_512-NEXT: tbnz w8, #30, .LBB19_64 |
| ; VBITS_GE_512-NEXT: .LBB19_33: // %else118 |
| ; VBITS_GE_512-NEXT: tbz w8, #31, .LBB19_35 |
| ; VBITS_GE_512-NEXT: .LBB19_34: // %cond.load121 |
| ; VBITS_GE_512-NEXT: mov w8, #31 // =0x1f |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w8 |
| ; VBITS_GE_512-NEXT: ldrb w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB19_35: // %else122 |
| ; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b |
| ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 |
| ; VBITS_GE_512-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2] |
| ; VBITS_GE_512-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: add sp, sp, #112 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB19_36: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB19_6 |
| ; VBITS_GE_512-NEXT: .LBB19_37: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB19_7 |
| ; VBITS_GE_512-NEXT: .LBB19_38: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB19_8 |
| ; VBITS_GE_512-NEXT: .LBB19_39: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB19_9 |
| ; VBITS_GE_512-NEXT: .LBB19_40: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB19_10 |
| ; VBITS_GE_512-NEXT: .LBB19_41: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB19_11 |
| ; VBITS_GE_512-NEXT: .LBB19_42: // %cond.load29 |
| ; VBITS_GE_512-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB19_12 |
| ; VBITS_GE_512-NEXT: .LBB19_43: // %cond.load33 |
| ; VBITS_GE_512-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB19_13 |
| ; VBITS_GE_512-NEXT: .LBB19_44: // %cond.load37 |
| ; VBITS_GE_512-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB19_14 |
| ; VBITS_GE_512-NEXT: .LBB19_45: // %cond.load41 |
| ; VBITS_GE_512-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB19_15 |
| ; VBITS_GE_512-NEXT: .LBB19_46: // %cond.load45 |
| ; VBITS_GE_512-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB19_16 |
| ; VBITS_GE_512-NEXT: .LBB19_47: // %cond.load49 |
| ; VBITS_GE_512-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB19_17 |
| ; VBITS_GE_512-NEXT: .LBB19_48: // %cond.load53 |
| ; VBITS_GE_512-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB19_18 |
| ; VBITS_GE_512-NEXT: .LBB19_49: // %cond.load57 |
| ; VBITS_GE_512-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #16, .LBB19_19 |
| ; VBITS_GE_512-NEXT: .LBB19_50: // %cond.load61 |
| ; VBITS_GE_512-NEXT: mov w9, #16 // =0x10 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #17, .LBB19_20 |
| ; VBITS_GE_512-NEXT: .LBB19_51: // %cond.load65 |
| ; VBITS_GE_512-NEXT: mov w9, #17 // =0x11 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #18, .LBB19_21 |
| ; VBITS_GE_512-NEXT: .LBB19_52: // %cond.load69 |
| ; VBITS_GE_512-NEXT: mov w9, #18 // =0x12 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #19, .LBB19_22 |
| ; VBITS_GE_512-NEXT: .LBB19_53: // %cond.load73 |
| ; VBITS_GE_512-NEXT: mov w9, #19 // =0x13 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #20, .LBB19_23 |
| ; VBITS_GE_512-NEXT: .LBB19_54: // %cond.load77 |
| ; VBITS_GE_512-NEXT: mov w9, #20 // =0x14 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #21, .LBB19_24 |
| ; VBITS_GE_512-NEXT: .LBB19_55: // %cond.load81 |
| ; VBITS_GE_512-NEXT: mov w9, #21 // =0x15 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #22, .LBB19_25 |
| ; VBITS_GE_512-NEXT: .LBB19_56: // %cond.load85 |
| ; VBITS_GE_512-NEXT: mov w9, #22 // =0x16 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #23, .LBB19_26 |
| ; VBITS_GE_512-NEXT: .LBB19_57: // %cond.load89 |
| ; VBITS_GE_512-NEXT: mov w9, #23 // =0x17 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #24, .LBB19_27 |
| ; VBITS_GE_512-NEXT: .LBB19_58: // %cond.load93 |
| ; VBITS_GE_512-NEXT: mov w9, #24 // =0x18 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #25, .LBB19_28 |
| ; VBITS_GE_512-NEXT: .LBB19_59: // %cond.load97 |
| ; VBITS_GE_512-NEXT: mov w9, #25 // =0x19 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #26, .LBB19_29 |
| ; VBITS_GE_512-NEXT: .LBB19_60: // %cond.load101 |
| ; VBITS_GE_512-NEXT: mov w9, #26 // =0x1a |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #27, .LBB19_30 |
| ; VBITS_GE_512-NEXT: .LBB19_61: // %cond.load105 |
| ; VBITS_GE_512-NEXT: mov w9, #27 // =0x1b |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #28, .LBB19_31 |
| ; VBITS_GE_512-NEXT: .LBB19_62: // %cond.load109 |
| ; VBITS_GE_512-NEXT: mov w9, #28 // =0x1c |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #29, .LBB19_32 |
| ; VBITS_GE_512-NEXT: .LBB19_63: // %cond.load113 |
| ; VBITS_GE_512-NEXT: mov w9, #29 // =0x1d |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #30, .LBB19_33 |
| ; VBITS_GE_512-NEXT: .LBB19_64: // %cond.load117 |
| ; VBITS_GE_512-NEXT: mov w9, #30 // =0x1e |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #31, .LBB19_34 |
| ; VBITS_GE_512-NEXT: b .LBB19_35 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v32i8i16: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.b, vl32 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b |
| ; CHECK-EXPAND-NEXT: whilelo p0.b, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #16 // =0x10 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.h, vl16 |
| ; CHECK-EXPAND-NEXT: expand z0.b, p1, z0.b |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.h, z0.b |
| ; CHECK-EXPAND-NEXT: uunpklo z1.h, z1.b |
| ; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <32 x i8>, ptr %bp |
| %mask = icmp eq <32 x i8> %b, zeroinitializer |
| %load = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %ap, <32 x i1> %mask, <32 x i8> poison) |
| %ext = zext <32 x i8> %load to <32 x i16> |
| store <32 x i16> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v16i8i32(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_v16i8i32: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: ldr q0, [x1] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI20_0 |
| ; VBITS_GE_256-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] |
| ; VBITS_GE_256-NEXT: cmeq v0.16b, v0.16b, #0 |
| ; VBITS_GE_256-NEXT: and v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_256-NEXT: addv h0, v0.8h |
| ; VBITS_GE_256-NEXT: fmov w8, s0 |
| ; VBITS_GE_256-NEXT: tbz w8, #0, .LBB20_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: fmov s0, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB20_3 |
| ; VBITS_GE_256-NEXT: b .LBB20_4 |
| ; VBITS_GE_256-NEXT: .LBB20_2: |
| ; VBITS_GE_256-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB20_4 |
| ; VBITS_GE_256-NEXT: .LBB20_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_256-NEXT: .LBB20_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB20_20 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB20_21 |
| ; VBITS_GE_256-NEXT: .LBB20_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB20_22 |
| ; VBITS_GE_256-NEXT: .LBB20_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB20_23 |
| ; VBITS_GE_256-NEXT: .LBB20_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB20_24 |
| ; VBITS_GE_256-NEXT: .LBB20_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB20_25 |
| ; VBITS_GE_256-NEXT: .LBB20_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB20_26 |
| ; VBITS_GE_256-NEXT: .LBB20_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB20_27 |
| ; VBITS_GE_256-NEXT: .LBB20_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB20_28 |
| ; VBITS_GE_256-NEXT: .LBB20_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB20_29 |
| ; VBITS_GE_256-NEXT: .LBB20_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB20_30 |
| ; VBITS_GE_256-NEXT: .LBB20_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB20_31 |
| ; VBITS_GE_256-NEXT: .LBB20_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB20_32 |
| ; VBITS_GE_256-NEXT: .LBB20_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB20_19 |
| ; VBITS_GE_256-NEXT: .LBB20_18: // %cond.load57 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[15], [x0] |
| ; VBITS_GE_256-NEXT: .LBB20_19: // %else58 |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 |
| ; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b |
| ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB20_20: // %cond.load5 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB20_6 |
| ; VBITS_GE_256-NEXT: .LBB20_21: // %cond.load9 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB20_7 |
| ; VBITS_GE_256-NEXT: .LBB20_22: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB20_8 |
| ; VBITS_GE_256-NEXT: .LBB20_23: // %cond.load17 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB20_9 |
| ; VBITS_GE_256-NEXT: .LBB20_24: // %cond.load21 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB20_10 |
| ; VBITS_GE_256-NEXT: .LBB20_25: // %cond.load25 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB20_11 |
| ; VBITS_GE_256-NEXT: .LBB20_26: // %cond.load29 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[8], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB20_12 |
| ; VBITS_GE_256-NEXT: .LBB20_27: // %cond.load33 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[9], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB20_13 |
| ; VBITS_GE_256-NEXT: .LBB20_28: // %cond.load37 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[10], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB20_14 |
| ; VBITS_GE_256-NEXT: .LBB20_29: // %cond.load41 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[11], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB20_15 |
| ; VBITS_GE_256-NEXT: .LBB20_30: // %cond.load45 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[12], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB20_16 |
| ; VBITS_GE_256-NEXT: .LBB20_31: // %cond.load49 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[13], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB20_17 |
| ; VBITS_GE_256-NEXT: .LBB20_32: // %cond.load53 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[14], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB20_18 |
| ; VBITS_GE_256-NEXT: b .LBB20_19 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_v16i8i32: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: ldr q0, [x1] |
| ; VBITS_GE_512-NEXT: adrp x8, .LCPI20_0 |
| ; VBITS_GE_512-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] |
| ; VBITS_GE_512-NEXT: cmeq v0.16b, v0.16b, #0 |
| ; VBITS_GE_512-NEXT: and v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_512-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_512-NEXT: zip1 v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_512-NEXT: addv h0, v0.8h |
| ; VBITS_GE_512-NEXT: fmov w8, s0 |
| ; VBITS_GE_512-NEXT: tbz w8, #0, .LBB20_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: fmov s0, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB20_3 |
| ; VBITS_GE_512-NEXT: b .LBB20_4 |
| ; VBITS_GE_512-NEXT: .LBB20_2: |
| ; VBITS_GE_512-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB20_4 |
| ; VBITS_GE_512-NEXT: .LBB20_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_512-NEXT: .LBB20_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB20_20 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB20_21 |
| ; VBITS_GE_512-NEXT: .LBB20_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB20_22 |
| ; VBITS_GE_512-NEXT: .LBB20_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB20_23 |
| ; VBITS_GE_512-NEXT: .LBB20_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB20_24 |
| ; VBITS_GE_512-NEXT: .LBB20_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB20_25 |
| ; VBITS_GE_512-NEXT: .LBB20_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB20_26 |
| ; VBITS_GE_512-NEXT: .LBB20_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB20_27 |
| ; VBITS_GE_512-NEXT: .LBB20_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB20_28 |
| ; VBITS_GE_512-NEXT: .LBB20_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB20_29 |
| ; VBITS_GE_512-NEXT: .LBB20_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB20_30 |
| ; VBITS_GE_512-NEXT: .LBB20_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB20_31 |
| ; VBITS_GE_512-NEXT: .LBB20_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB20_32 |
| ; VBITS_GE_512-NEXT: .LBB20_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB20_19 |
| ; VBITS_GE_512-NEXT: .LBB20_18: // %cond.load57 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[15], [x0] |
| ; VBITS_GE_512-NEXT: .LBB20_19: // %else58 |
| ; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b |
| ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 |
| ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB20_20: // %cond.load5 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB20_6 |
| ; VBITS_GE_512-NEXT: .LBB20_21: // %cond.load9 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB20_7 |
| ; VBITS_GE_512-NEXT: .LBB20_22: // %cond.load13 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB20_8 |
| ; VBITS_GE_512-NEXT: .LBB20_23: // %cond.load17 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB20_9 |
| ; VBITS_GE_512-NEXT: .LBB20_24: // %cond.load21 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB20_10 |
| ; VBITS_GE_512-NEXT: .LBB20_25: // %cond.load25 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB20_11 |
| ; VBITS_GE_512-NEXT: .LBB20_26: // %cond.load29 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[8], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB20_12 |
| ; VBITS_GE_512-NEXT: .LBB20_27: // %cond.load33 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[9], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB20_13 |
| ; VBITS_GE_512-NEXT: .LBB20_28: // %cond.load37 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[10], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB20_14 |
| ; VBITS_GE_512-NEXT: .LBB20_29: // %cond.load41 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[11], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB20_15 |
| ; VBITS_GE_512-NEXT: .LBB20_30: // %cond.load45 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[12], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB20_16 |
| ; VBITS_GE_512-NEXT: .LBB20_31: // %cond.load49 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[13], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB20_17 |
| ; VBITS_GE_512-NEXT: .LBB20_32: // %cond.load53 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[14], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB20_18 |
| ; VBITS_GE_512-NEXT: b .LBB20_19 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v16i8i32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.b, vl16 |
| ; CHECK-EXPAND-NEXT: ldr q0, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b |
| ; CHECK-EXPAND-NEXT: whilelo p0.b, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: expand z0.b, p1, z0.b |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.h, z0.b |
| ; CHECK-EXPAND-NEXT: uunpklo z1.h, z1.b |
| ; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <16 x i8>, ptr %bp |
| %mask = icmp eq <16 x i8> %b, zeroinitializer |
| %load = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %ap, <16 x i1> %mask, <16 x i8> poison) |
| %ext = zext <16 x i8> %load to <16 x i32> |
| store <16 x i32> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v8i8i64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_v8i8i64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: ldr d0, [x1] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI21_0 |
| ; VBITS_GE_256-NEXT: ldr d1, [x8, :lo12:.LCPI21_0] |
| ; VBITS_GE_256-NEXT: cmeq v0.8b, v0.8b, #0 |
| ; VBITS_GE_256-NEXT: and v0.8b, v0.8b, v1.8b |
| ; VBITS_GE_256-NEXT: addv b0, v0.8b |
| ; VBITS_GE_256-NEXT: fmov w8, s0 |
| ; VBITS_GE_256-NEXT: tbz w8, #0, .LBB21_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: fmov s0, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB21_3 |
| ; VBITS_GE_256-NEXT: b .LBB21_4 |
| ; VBITS_GE_256-NEXT: .LBB21_2: |
| ; VBITS_GE_256-NEXT: // implicit-def: $d0 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB21_4 |
| ; VBITS_GE_256-NEXT: .LBB21_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_256-NEXT: .LBB21_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB21_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB21_13 |
| ; VBITS_GE_256-NEXT: .LBB21_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB21_14 |
| ; VBITS_GE_256-NEXT: .LBB21_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB21_15 |
| ; VBITS_GE_256-NEXT: .LBB21_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB21_16 |
| ; VBITS_GE_256-NEXT: .LBB21_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB21_11 |
| ; VBITS_GE_256-NEXT: .LBB21_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0] |
| ; VBITS_GE_256-NEXT: .LBB21_11: // %else26 |
| ; VBITS_GE_256-NEXT: ushll v0.8h, v0.8b, #0 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB21_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB21_6 |
| ; VBITS_GE_256-NEXT: .LBB21_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB21_7 |
| ; VBITS_GE_256-NEXT: .LBB21_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB21_8 |
| ; VBITS_GE_256-NEXT: .LBB21_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB21_9 |
| ; VBITS_GE_256-NEXT: .LBB21_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB21_10 |
| ; VBITS_GE_256-NEXT: b .LBB21_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_v8i8i64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: ldr d0, [x1] |
| ; VBITS_GE_512-NEXT: adrp x8, .LCPI21_0 |
| ; VBITS_GE_512-NEXT: ldr d1, [x8, :lo12:.LCPI21_0] |
| ; VBITS_GE_512-NEXT: cmeq v0.8b, v0.8b, #0 |
| ; VBITS_GE_512-NEXT: and v0.8b, v0.8b, v1.8b |
| ; VBITS_GE_512-NEXT: addv b0, v0.8b |
| ; VBITS_GE_512-NEXT: fmov w8, s0 |
| ; VBITS_GE_512-NEXT: tbz w8, #0, .LBB21_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: fmov s0, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB21_3 |
| ; VBITS_GE_512-NEXT: b .LBB21_4 |
| ; VBITS_GE_512-NEXT: .LBB21_2: |
| ; VBITS_GE_512-NEXT: // implicit-def: $d0 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB21_4 |
| ; VBITS_GE_512-NEXT: .LBB21_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_512-NEXT: .LBB21_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB21_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB21_13 |
| ; VBITS_GE_512-NEXT: .LBB21_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB21_14 |
| ; VBITS_GE_512-NEXT: .LBB21_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB21_15 |
| ; VBITS_GE_512-NEXT: .LBB21_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB21_16 |
| ; VBITS_GE_512-NEXT: .LBB21_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB21_11 |
| ; VBITS_GE_512-NEXT: .LBB21_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0] |
| ; VBITS_GE_512-NEXT: .LBB21_11: // %else26 |
| ; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB21_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB21_6 |
| ; VBITS_GE_512-NEXT: .LBB21_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB21_7 |
| ; VBITS_GE_512-NEXT: .LBB21_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB21_8 |
| ; VBITS_GE_512-NEXT: .LBB21_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB21_9 |
| ; VBITS_GE_512-NEXT: .LBB21_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB21_10 |
| ; VBITS_GE_512-NEXT: b .LBB21_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v8i8i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.b, vl8 |
| ; CHECK-EXPAND-NEXT: ldr d0, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b |
| ; CHECK-EXPAND-NEXT: whilelo p0.b, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: expand z0.b, p1, z0.b |
| ; CHECK-EXPAND-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i8>, ptr %bp |
| %mask = icmp eq <8 x i8> %b, zeroinitializer |
| %load = call <8 x i8> @llvm.masked.expandload.v8i8(ptr %ap, <8 x i1> %mask, <8 x i8> poison) |
| %ext = zext <8 x i8> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v16i16i32(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_v16i16i32: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p1.h, vl16 |
| ; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p0.h, p1/z, z0.h, #0 |
| ; VBITS_GE_256-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p0.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: umov w11, v0.b[7] |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[8] |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[3] |
| ; VBITS_GE_256-NEXT: umov w14, v0.b[4] |
| ; VBITS_GE_256-NEXT: umov w15, v0.b[10] |
| ; VBITS_GE_256-NEXT: umov w16, v0.b[5] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[9] |
| ; VBITS_GE_256-NEXT: ubfiz w11, w11, #7, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #8, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w15, w15, #10, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[11] |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_256-NEXT: bfi w8, w13, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_256-NEXT: ubfiz w9, w9, #9, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w14, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_256-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_256-NEXT: umov w11, v0.b[6] |
| ; VBITS_GE_256-NEXT: ubfiz w10, w10, #11, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w15 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #12, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w16, #5, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w12, #13, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_256-NEXT: bfi w8, w11, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w13, lsl #15 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xffff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB22_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rh { z0.h }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB22_3 |
| ; VBITS_GE_256-NEXT: b .LBB22_4 |
| ; VBITS_GE_256-NEXT: .LBB22_2: |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI22_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI22_0 |
| ; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB22_4 |
| ; VBITS_GE_256-NEXT: .LBB22_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB22_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB22_20 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB22_21 |
| ; VBITS_GE_256-NEXT: .LBB22_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB22_22 |
| ; VBITS_GE_256-NEXT: .LBB22_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB22_23 |
| ; VBITS_GE_256-NEXT: .LBB22_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB22_24 |
| ; VBITS_GE_256-NEXT: .LBB22_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB22_25 |
| ; VBITS_GE_256-NEXT: .LBB22_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB22_26 |
| ; VBITS_GE_256-NEXT: .LBB22_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB22_27 |
| ; VBITS_GE_256-NEXT: .LBB22_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB22_28 |
| ; VBITS_GE_256-NEXT: .LBB22_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB22_29 |
| ; VBITS_GE_256-NEXT: .LBB22_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB22_30 |
| ; VBITS_GE_256-NEXT: .LBB22_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB22_31 |
| ; VBITS_GE_256-NEXT: .LBB22_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB22_32 |
| ; VBITS_GE_256-NEXT: .LBB22_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB22_19 |
| ; VBITS_GE_256-NEXT: .LBB22_18: // %cond.load57 |
| ; VBITS_GE_256-NEXT: mov w8, #15 // =0xf |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w8 |
| ; VBITS_GE_256-NEXT: ldrh w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB22_19: // %else58 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB22_20: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB22_6 |
| ; VBITS_GE_256-NEXT: .LBB22_21: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB22_7 |
| ; VBITS_GE_256-NEXT: .LBB22_22: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB22_8 |
| ; VBITS_GE_256-NEXT: .LBB22_23: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB22_9 |
| ; VBITS_GE_256-NEXT: .LBB22_24: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB22_10 |
| ; VBITS_GE_256-NEXT: .LBB22_25: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB22_11 |
| ; VBITS_GE_256-NEXT: .LBB22_26: // %cond.load29 |
| ; VBITS_GE_256-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB22_12 |
| ; VBITS_GE_256-NEXT: .LBB22_27: // %cond.load33 |
| ; VBITS_GE_256-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB22_13 |
| ; VBITS_GE_256-NEXT: .LBB22_28: // %cond.load37 |
| ; VBITS_GE_256-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB22_14 |
| ; VBITS_GE_256-NEXT: .LBB22_29: // %cond.load41 |
| ; VBITS_GE_256-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB22_15 |
| ; VBITS_GE_256-NEXT: .LBB22_30: // %cond.load45 |
| ; VBITS_GE_256-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB22_16 |
| ; VBITS_GE_256-NEXT: .LBB22_31: // %cond.load49 |
| ; VBITS_GE_256-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB22_17 |
| ; VBITS_GE_256-NEXT: .LBB22_32: // %cond.load53 |
| ; VBITS_GE_256-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB22_18 |
| ; VBITS_GE_256-NEXT: b .LBB22_19 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_v16i16i32: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p1.h, vl16 |
| ; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p0.h, p1/z, z0.h, #0 |
| ; VBITS_GE_512-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[8] |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[3] |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[4] |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[10] |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[5] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[9] |
| ; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[11] |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[6] |
| ; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w15 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xffff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB22_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rh { z0.h }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB22_3 |
| ; VBITS_GE_512-NEXT: b .LBB22_4 |
| ; VBITS_GE_512-NEXT: .LBB22_2: |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI22_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI22_0 |
| ; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB22_4 |
| ; VBITS_GE_512-NEXT: .LBB22_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB22_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB22_20 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB22_21 |
| ; VBITS_GE_512-NEXT: .LBB22_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB22_22 |
| ; VBITS_GE_512-NEXT: .LBB22_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB22_23 |
| ; VBITS_GE_512-NEXT: .LBB22_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB22_24 |
| ; VBITS_GE_512-NEXT: .LBB22_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB22_25 |
| ; VBITS_GE_512-NEXT: .LBB22_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB22_26 |
| ; VBITS_GE_512-NEXT: .LBB22_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB22_27 |
| ; VBITS_GE_512-NEXT: .LBB22_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB22_28 |
| ; VBITS_GE_512-NEXT: .LBB22_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB22_29 |
| ; VBITS_GE_512-NEXT: .LBB22_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB22_30 |
| ; VBITS_GE_512-NEXT: .LBB22_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB22_31 |
| ; VBITS_GE_512-NEXT: .LBB22_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB22_32 |
| ; VBITS_GE_512-NEXT: .LBB22_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB22_19 |
| ; VBITS_GE_512-NEXT: .LBB22_18: // %cond.load57 |
| ; VBITS_GE_512-NEXT: mov w8, #15 // =0xf |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w8 |
| ; VBITS_GE_512-NEXT: ldrh w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB22_19: // %else58 |
| ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 |
| ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB22_20: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB22_6 |
| ; VBITS_GE_512-NEXT: .LBB22_21: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB22_7 |
| ; VBITS_GE_512-NEXT: .LBB22_22: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB22_8 |
| ; VBITS_GE_512-NEXT: .LBB22_23: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB22_9 |
| ; VBITS_GE_512-NEXT: .LBB22_24: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB22_10 |
| ; VBITS_GE_512-NEXT: .LBB22_25: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB22_11 |
| ; VBITS_GE_512-NEXT: .LBB22_26: // %cond.load29 |
| ; VBITS_GE_512-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB22_12 |
| ; VBITS_GE_512-NEXT: .LBB22_27: // %cond.load33 |
| ; VBITS_GE_512-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB22_13 |
| ; VBITS_GE_512-NEXT: .LBB22_28: // %cond.load37 |
| ; VBITS_GE_512-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB22_14 |
| ; VBITS_GE_512-NEXT: .LBB22_29: // %cond.load41 |
| ; VBITS_GE_512-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB22_15 |
| ; VBITS_GE_512-NEXT: .LBB22_30: // %cond.load45 |
| ; VBITS_GE_512-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB22_16 |
| ; VBITS_GE_512-NEXT: .LBB22_31: // %cond.load49 |
| ; VBITS_GE_512-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB22_17 |
| ; VBITS_GE_512-NEXT: .LBB22_32: // %cond.load53 |
| ; VBITS_GE_512-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB22_18 |
| ; VBITS_GE_512-NEXT: b .LBB22_19 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v16i16i32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.h, vl16 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h |
| ; CHECK-EXPAND-NEXT: whilelo p0.h, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <16 x i16>, ptr %bp |
| %mask = icmp eq <16 x i16> %b, zeroinitializer |
| %load = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %ap, <16 x i1> %mask, <16 x i16> poison) |
| %ext = zext <16 x i16> %load to <16 x i32> |
| store <16 x i32> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v8i16i64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_v8i16i64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: ldr q0, [x1] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI23_0 |
| ; VBITS_GE_256-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] |
| ; VBITS_GE_256-NEXT: cmeq v0.8h, v0.8h, #0 |
| ; VBITS_GE_256-NEXT: and v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_256-NEXT: addv h0, v0.8h |
| ; VBITS_GE_256-NEXT: fmov w8, s0 |
| ; VBITS_GE_256-NEXT: tbz w8, #0, .LBB23_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: fmov s0, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB23_3 |
| ; VBITS_GE_256-NEXT: b .LBB23_4 |
| ; VBITS_GE_256-NEXT: .LBB23_2: |
| ; VBITS_GE_256-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB23_4 |
| ; VBITS_GE_256-NEXT: .LBB23_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[1], [x0], #2 |
| ; VBITS_GE_256-NEXT: .LBB23_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB23_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB23_13 |
| ; VBITS_GE_256-NEXT: .LBB23_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB23_14 |
| ; VBITS_GE_256-NEXT: .LBB23_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB23_15 |
| ; VBITS_GE_256-NEXT: .LBB23_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB23_16 |
| ; VBITS_GE_256-NEXT: .LBB23_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB23_11 |
| ; VBITS_GE_256-NEXT: .LBB23_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[7], [x0] |
| ; VBITS_GE_256-NEXT: .LBB23_11: // %else26 |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB23_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[2], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB23_6 |
| ; VBITS_GE_256-NEXT: .LBB23_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[3], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB23_7 |
| ; VBITS_GE_256-NEXT: .LBB23_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[4], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB23_8 |
| ; VBITS_GE_256-NEXT: .LBB23_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[5], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB23_9 |
| ; VBITS_GE_256-NEXT: .LBB23_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[6], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB23_10 |
| ; VBITS_GE_256-NEXT: b .LBB23_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_v8i16i64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: ldr q0, [x1] |
| ; VBITS_GE_512-NEXT: adrp x8, .LCPI23_0 |
| ; VBITS_GE_512-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] |
| ; VBITS_GE_512-NEXT: cmeq v0.8h, v0.8h, #0 |
| ; VBITS_GE_512-NEXT: and v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_512-NEXT: addv h0, v0.8h |
| ; VBITS_GE_512-NEXT: fmov w8, s0 |
| ; VBITS_GE_512-NEXT: tbz w8, #0, .LBB23_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: fmov s0, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB23_3 |
| ; VBITS_GE_512-NEXT: b .LBB23_4 |
| ; VBITS_GE_512-NEXT: .LBB23_2: |
| ; VBITS_GE_512-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB23_4 |
| ; VBITS_GE_512-NEXT: .LBB23_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[1], [x0], #2 |
| ; VBITS_GE_512-NEXT: .LBB23_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB23_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB23_13 |
| ; VBITS_GE_512-NEXT: .LBB23_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB23_14 |
| ; VBITS_GE_512-NEXT: .LBB23_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB23_15 |
| ; VBITS_GE_512-NEXT: .LBB23_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB23_16 |
| ; VBITS_GE_512-NEXT: .LBB23_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB23_11 |
| ; VBITS_GE_512-NEXT: .LBB23_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[7], [x0] |
| ; VBITS_GE_512-NEXT: .LBB23_11: // %else26 |
| ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB23_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[2], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB23_6 |
| ; VBITS_GE_512-NEXT: .LBB23_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[3], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB23_7 |
| ; VBITS_GE_512-NEXT: .LBB23_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[4], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB23_8 |
| ; VBITS_GE_512-NEXT: .LBB23_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[5], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB23_9 |
| ; VBITS_GE_512-NEXT: .LBB23_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[6], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB23_10 |
| ; VBITS_GE_512-NEXT: b .LBB23_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v8i16i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.h, vl8 |
| ; CHECK-EXPAND-NEXT: ldr q0, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h |
| ; CHECK-EXPAND-NEXT: whilelo p0.h, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i16>, ptr %bp |
| %mask = icmp eq <8 x i16> %b, zeroinitializer |
| %load = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %ap, <8 x i1> %mask, <8 x i16> poison) |
| %ext = zext <8 x i16> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v8i32i64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_v8i32i64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl8 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p0.s, p1/z, z0.s, #0 |
| ; VBITS_GE_256-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p0.s |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB24_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rw { z0.s }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #4 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB24_3 |
| ; VBITS_GE_256-NEXT: b .LBB24_4 |
| ; VBITS_GE_256-NEXT: .LBB24_2: |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI24_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI24_0 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB24_4 |
| ; VBITS_GE_256-NEXT: .LBB24_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB24_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB24_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB24_13 |
| ; VBITS_GE_256-NEXT: .LBB24_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB24_14 |
| ; VBITS_GE_256-NEXT: .LBB24_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB24_15 |
| ; VBITS_GE_256-NEXT: .LBB24_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB24_16 |
| ; VBITS_GE_256-NEXT: .LBB24_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB24_11 |
| ; VBITS_GE_256-NEXT: .LBB24_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w8 |
| ; VBITS_GE_256-NEXT: ldr w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB24_11: // %else26 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB24_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB24_6 |
| ; VBITS_GE_256-NEXT: .LBB24_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB24_7 |
| ; VBITS_GE_256-NEXT: .LBB24_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB24_8 |
| ; VBITS_GE_256-NEXT: .LBB24_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB24_9 |
| ; VBITS_GE_256-NEXT: .LBB24_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB24_10 |
| ; VBITS_GE_256-NEXT: b .LBB24_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_v8i32i64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p1.s, vl8 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p0.s, p1/z, z0.s, #0 |
| ; VBITS_GE_512-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p0.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB24_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rw { z0.s }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #4 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB24_3 |
| ; VBITS_GE_512-NEXT: b .LBB24_4 |
| ; VBITS_GE_512-NEXT: .LBB24_2: |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI24_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI24_0 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB24_4 |
| ; VBITS_GE_512-NEXT: .LBB24_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB24_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB24_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB24_13 |
| ; VBITS_GE_512-NEXT: .LBB24_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB24_14 |
| ; VBITS_GE_512-NEXT: .LBB24_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB24_15 |
| ; VBITS_GE_512-NEXT: .LBB24_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB24_16 |
| ; VBITS_GE_512-NEXT: .LBB24_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB24_11 |
| ; VBITS_GE_512-NEXT: .LBB24_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w8 |
| ; VBITS_GE_512-NEXT: ldr w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB24_11: // %else26 |
| ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB24_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB24_6 |
| ; VBITS_GE_512-NEXT: .LBB24_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB24_7 |
| ; VBITS_GE_512-NEXT: .LBB24_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB24_8 |
| ; VBITS_GE_512-NEXT: .LBB24_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB24_9 |
| ; VBITS_GE_512-NEXT: .LBB24_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB24_10 |
| ; VBITS_GE_512-NEXT: b .LBB24_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v8i32i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s |
| ; CHECK-EXPAND-NEXT: whilelo p0.s, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i32>, ptr %bp |
| %mask = icmp eq <8 x i32> %b, zeroinitializer |
| %load = call <8 x i32> @llvm.masked.expandload.v8i32(ptr %ap, <8 x i1> %mask, <8 x i32> poison) |
| %ext = zext <8 x i32> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v32i8i16_m16(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_v32i8i16_m16: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 |
| ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, #0 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z1.h, #0 |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z2.h, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.b |
| ; VBITS_GE_256-NEXT: uzp1 z1.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z2.b, z2.b |
| ; VBITS_GE_256-NEXT: umov w8, v1.b[0] |
| ; VBITS_GE_256-NEXT: umov w11, v0.b[3] |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[4] |
| ; VBITS_GE_256-NEXT: umov w13, v1.b[1] |
| ; VBITS_GE_256-NEXT: umov w9, v1.b[7] |
| ; VBITS_GE_256-NEXT: umov w10, v1.b[8] |
| ; VBITS_GE_256-NEXT: umov w16, v1.b[9] |
| ; VBITS_GE_256-NEXT: umov w17, v1.b[10] |
| ; VBITS_GE_256-NEXT: umov w18, v0.b[5] |
| ; VBITS_GE_256-NEXT: umov w14, v1.b[2] |
| ; VBITS_GE_256-NEXT: umov w15, v1.b[3] |
| ; VBITS_GE_256-NEXT: umov w1, v1.b[4] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w11, #19, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #20, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w13, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[6] |
| ; VBITS_GE_256-NEXT: ubfiz w9, w9, #7, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w10, #8, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: umov w12, v1.b[11] |
| ; VBITS_GE_256-NEXT: ubfiz w16, w16, #9, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w17, w17, #10, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w18, w18, #21, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_256-NEXT: bfi w8, w14, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w14, v0.b[7] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: umov w16, v1.b[12] |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #22, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w18 |
| ; VBITS_GE_256-NEXT: umov w18, v0.b[8] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w17 |
| ; VBITS_GE_256-NEXT: umov w17, v1.b[13] |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #11, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w13 |
| ; VBITS_GE_256-NEXT: umov w13, v1.b[14] |
| ; VBITS_GE_256-NEXT: bfi w8, w15, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w15, v0.b[9] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[10] |
| ; VBITS_GE_256-NEXT: ubfiz w14, w14, #23, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w16, #12, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w18, w18, #24, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v1.b[5] |
| ; VBITS_GE_256-NEXT: ubfiz w17, w17, #13, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w14 |
| ; VBITS_GE_256-NEXT: bfi w8, w1, #4, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: umov w16, v1.b[15] |
| ; VBITS_GE_256-NEXT: ubfiz w15, w15, #25, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #14, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w18 |
| ; VBITS_GE_256-NEXT: umov w18, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w1, v0.b[11] |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #26, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w17 |
| ; VBITS_GE_256-NEXT: umov w17, v0.b[1] |
| ; VBITS_GE_256-NEXT: orr w11, w11, w15 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_256-NEXT: umov w14, v1.b[6] |
| ; VBITS_GE_256-NEXT: umov w15, v0.b[2] |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_256-NEXT: ubfiz w16, w16, #15, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[14] |
| ; VBITS_GE_256-NEXT: ubfiz w1, w1, #27, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w18, w18, #16, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w17, #17, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #28, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w1 |
| ; VBITS_GE_256-NEXT: bfi w8, w14, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w18 |
| ; VBITS_GE_256-NEXT: ubfiz w14, w15, #18, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #29, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w10, #30, #1 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w14 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_256-NEXT: orr w9, w11, w10 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w13, lsl #31 |
| ; VBITS_GE_256-NEXT: tbz w8, #0, .LBB25_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rb { z0.b }, p1/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #1 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB25_3 |
| ; VBITS_GE_256-NEXT: b .LBB25_4 |
| ; VBITS_GE_256-NEXT: .LBB25_2: |
| ; VBITS_GE_256-NEXT: ptrue p2.b, vl32 |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI25_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI25_0 |
| ; VBITS_GE_256-NEXT: ld1b { z0.b }, p2/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB25_4 |
| ; VBITS_GE_256-NEXT: .LBB25_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB25_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB25_36 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB25_37 |
| ; VBITS_GE_256-NEXT: .LBB25_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB25_38 |
| ; VBITS_GE_256-NEXT: .LBB25_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB25_39 |
| ; VBITS_GE_256-NEXT: .LBB25_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB25_40 |
| ; VBITS_GE_256-NEXT: .LBB25_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB25_41 |
| ; VBITS_GE_256-NEXT: .LBB25_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB25_42 |
| ; VBITS_GE_256-NEXT: .LBB25_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB25_43 |
| ; VBITS_GE_256-NEXT: .LBB25_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB25_44 |
| ; VBITS_GE_256-NEXT: .LBB25_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB25_45 |
| ; VBITS_GE_256-NEXT: .LBB25_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB25_46 |
| ; VBITS_GE_256-NEXT: .LBB25_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB25_47 |
| ; VBITS_GE_256-NEXT: .LBB25_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB25_48 |
| ; VBITS_GE_256-NEXT: .LBB25_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB25_49 |
| ; VBITS_GE_256-NEXT: .LBB25_18: // %else58 |
| ; VBITS_GE_256-NEXT: tbnz w8, #16, .LBB25_50 |
| ; VBITS_GE_256-NEXT: .LBB25_19: // %else62 |
| ; VBITS_GE_256-NEXT: tbnz w8, #17, .LBB25_51 |
| ; VBITS_GE_256-NEXT: .LBB25_20: // %else66 |
| ; VBITS_GE_256-NEXT: tbnz w8, #18, .LBB25_52 |
| ; VBITS_GE_256-NEXT: .LBB25_21: // %else70 |
| ; VBITS_GE_256-NEXT: tbnz w8, #19, .LBB25_53 |
| ; VBITS_GE_256-NEXT: .LBB25_22: // %else74 |
| ; VBITS_GE_256-NEXT: tbnz w8, #20, .LBB25_54 |
| ; VBITS_GE_256-NEXT: .LBB25_23: // %else78 |
| ; VBITS_GE_256-NEXT: tbnz w8, #21, .LBB25_55 |
| ; VBITS_GE_256-NEXT: .LBB25_24: // %else82 |
| ; VBITS_GE_256-NEXT: tbnz w8, #22, .LBB25_56 |
| ; VBITS_GE_256-NEXT: .LBB25_25: // %else86 |
| ; VBITS_GE_256-NEXT: tbnz w8, #23, .LBB25_57 |
| ; VBITS_GE_256-NEXT: .LBB25_26: // %else90 |
| ; VBITS_GE_256-NEXT: tbnz w8, #24, .LBB25_58 |
| ; VBITS_GE_256-NEXT: .LBB25_27: // %else94 |
| ; VBITS_GE_256-NEXT: tbnz w8, #25, .LBB25_59 |
| ; VBITS_GE_256-NEXT: .LBB25_28: // %else98 |
| ; VBITS_GE_256-NEXT: tbnz w8, #26, .LBB25_60 |
| ; VBITS_GE_256-NEXT: .LBB25_29: // %else102 |
| ; VBITS_GE_256-NEXT: tbnz w8, #27, .LBB25_61 |
| ; VBITS_GE_256-NEXT: .LBB25_30: // %else106 |
| ; VBITS_GE_256-NEXT: tbnz w8, #28, .LBB25_62 |
| ; VBITS_GE_256-NEXT: .LBB25_31: // %else110 |
| ; VBITS_GE_256-NEXT: tbnz w8, #29, .LBB25_63 |
| ; VBITS_GE_256-NEXT: .LBB25_32: // %else114 |
| ; VBITS_GE_256-NEXT: tbnz w8, #30, .LBB25_64 |
| ; VBITS_GE_256-NEXT: .LBB25_33: // %else118 |
| ; VBITS_GE_256-NEXT: tbz w8, #31, .LBB25_35 |
| ; VBITS_GE_256-NEXT: .LBB25_34: // %cond.load121 |
| ; VBITS_GE_256-NEXT: mov w8, #31 // =0x1f |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w8 |
| ; VBITS_GE_256-NEXT: ldrb w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB25_35: // %else122 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b |
| ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b |
| ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB25_36: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB25_6 |
| ; VBITS_GE_256-NEXT: .LBB25_37: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB25_7 |
| ; VBITS_GE_256-NEXT: .LBB25_38: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB25_8 |
| ; VBITS_GE_256-NEXT: .LBB25_39: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB25_9 |
| ; VBITS_GE_256-NEXT: .LBB25_40: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB25_10 |
| ; VBITS_GE_256-NEXT: .LBB25_41: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB25_11 |
| ; VBITS_GE_256-NEXT: .LBB25_42: // %cond.load29 |
| ; VBITS_GE_256-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB25_12 |
| ; VBITS_GE_256-NEXT: .LBB25_43: // %cond.load33 |
| ; VBITS_GE_256-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB25_13 |
| ; VBITS_GE_256-NEXT: .LBB25_44: // %cond.load37 |
| ; VBITS_GE_256-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB25_14 |
| ; VBITS_GE_256-NEXT: .LBB25_45: // %cond.load41 |
| ; VBITS_GE_256-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB25_15 |
| ; VBITS_GE_256-NEXT: .LBB25_46: // %cond.load45 |
| ; VBITS_GE_256-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB25_16 |
| ; VBITS_GE_256-NEXT: .LBB25_47: // %cond.load49 |
| ; VBITS_GE_256-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB25_17 |
| ; VBITS_GE_256-NEXT: .LBB25_48: // %cond.load53 |
| ; VBITS_GE_256-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB25_18 |
| ; VBITS_GE_256-NEXT: .LBB25_49: // %cond.load57 |
| ; VBITS_GE_256-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #16, .LBB25_19 |
| ; VBITS_GE_256-NEXT: .LBB25_50: // %cond.load61 |
| ; VBITS_GE_256-NEXT: mov w9, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #17, .LBB25_20 |
| ; VBITS_GE_256-NEXT: .LBB25_51: // %cond.load65 |
| ; VBITS_GE_256-NEXT: mov w9, #17 // =0x11 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #18, .LBB25_21 |
| ; VBITS_GE_256-NEXT: .LBB25_52: // %cond.load69 |
| ; VBITS_GE_256-NEXT: mov w9, #18 // =0x12 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #19, .LBB25_22 |
| ; VBITS_GE_256-NEXT: .LBB25_53: // %cond.load73 |
| ; VBITS_GE_256-NEXT: mov w9, #19 // =0x13 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #20, .LBB25_23 |
| ; VBITS_GE_256-NEXT: .LBB25_54: // %cond.load77 |
| ; VBITS_GE_256-NEXT: mov w9, #20 // =0x14 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #21, .LBB25_24 |
| ; VBITS_GE_256-NEXT: .LBB25_55: // %cond.load81 |
| ; VBITS_GE_256-NEXT: mov w9, #21 // =0x15 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #22, .LBB25_25 |
| ; VBITS_GE_256-NEXT: .LBB25_56: // %cond.load85 |
| ; VBITS_GE_256-NEXT: mov w9, #22 // =0x16 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #23, .LBB25_26 |
| ; VBITS_GE_256-NEXT: .LBB25_57: // %cond.load89 |
| ; VBITS_GE_256-NEXT: mov w9, #23 // =0x17 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #24, .LBB25_27 |
| ; VBITS_GE_256-NEXT: .LBB25_58: // %cond.load93 |
| ; VBITS_GE_256-NEXT: mov w9, #24 // =0x18 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #25, .LBB25_28 |
| ; VBITS_GE_256-NEXT: .LBB25_59: // %cond.load97 |
| ; VBITS_GE_256-NEXT: mov w9, #25 // =0x19 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #26, .LBB25_29 |
| ; VBITS_GE_256-NEXT: .LBB25_60: // %cond.load101 |
| ; VBITS_GE_256-NEXT: mov w9, #26 // =0x1a |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #27, .LBB25_30 |
| ; VBITS_GE_256-NEXT: .LBB25_61: // %cond.load105 |
| ; VBITS_GE_256-NEXT: mov w9, #27 // =0x1b |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #28, .LBB25_31 |
| ; VBITS_GE_256-NEXT: .LBB25_62: // %cond.load109 |
| ; VBITS_GE_256-NEXT: mov w9, #28 // =0x1c |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #29, .LBB25_32 |
| ; VBITS_GE_256-NEXT: .LBB25_63: // %cond.load113 |
| ; VBITS_GE_256-NEXT: mov w9, #29 // =0x1d |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #30, .LBB25_33 |
| ; VBITS_GE_256-NEXT: .LBB25_64: // %cond.load117 |
| ; VBITS_GE_256-NEXT: mov w9, #30 // =0x1e |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #31, .LBB25_34 |
| ; VBITS_GE_256-NEXT: b .LBB25_35 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_v32i8i16_m16: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #112 |
| ; VBITS_GE_512-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 112 |
| ; VBITS_GE_512-NEXT: .cfi_offset w19, -8 |
| ; VBITS_GE_512-NEXT: .cfi_offset w20, -16 |
| ; VBITS_GE_512-NEXT: .cfi_offset w21, -24 |
| ; VBITS_GE_512-NEXT: .cfi_offset w22, -32 |
| ; VBITS_GE_512-NEXT: .cfi_offset w23, -40 |
| ; VBITS_GE_512-NEXT: .cfi_offset w24, -48 |
| ; VBITS_GE_512-NEXT: .cfi_offset w25, -56 |
| ; VBITS_GE_512-NEXT: .cfi_offset w26, -64 |
| ; VBITS_GE_512-NEXT: .cfi_offset w27, -72 |
| ; VBITS_GE_512-NEXT: .cfi_offset w28, -80 |
| ; VBITS_GE_512-NEXT: .cfi_offset w30, -88 |
| ; VBITS_GE_512-NEXT: .cfi_offset w29, -96 |
| ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 |
| ; VBITS_GE_512-NEXT: str x2, [sp] // 8-byte Spill |
| ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, #0 |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p1.b |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[1] |
| ; VBITS_GE_512-NEXT: fmov w6, s0 |
| ; VBITS_GE_512-NEXT: umov w3, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w5, v0.b[8] |
| ; VBITS_GE_512-NEXT: mov z5.b, z0.b[18] |
| ; VBITS_GE_512-NEXT: mov z6.b, z0.b[19] |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w4, v0.b[9] |
| ; VBITS_GE_512-NEXT: mov z7.b, z0.b[20] |
| ; VBITS_GE_512-NEXT: umov w1, v0.b[10] |
| ; VBITS_GE_512-NEXT: and w6, w6, #0x1 |
| ; VBITS_GE_512-NEXT: mov z16.b, z0.b[21] |
| ; VBITS_GE_512-NEXT: fmov w20, s5 |
| ; VBITS_GE_512-NEXT: fmov w21, s6 |
| ; VBITS_GE_512-NEXT: bfi w6, w12, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[3] |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[11] |
| ; VBITS_GE_512-NEXT: mov z17.b, z0.b[22] |
| ; VBITS_GE_512-NEXT: fmov w22, s7 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w3, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w3, w5, #8, #1 |
| ; VBITS_GE_512-NEXT: umov w17, v0.b[12] |
| ; VBITS_GE_512-NEXT: mov z18.b, z0.b[23] |
| ; VBITS_GE_512-NEXT: bfi w6, w13, #2, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w4, #9, #1 |
| ; VBITS_GE_512-NEXT: umov w18, v0.b[13] |
| ; VBITS_GE_512-NEXT: mov z19.b, z0.b[24] |
| ; VBITS_GE_512-NEXT: fmov w23, s16 |
| ; VBITS_GE_512-NEXT: ubfiz w5, w20, #18, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w20, w21, #19, #1 |
| ; VBITS_GE_512-NEXT: orr w12, w12, w3 |
| ; VBITS_GE_512-NEXT: ubfiz w1, w1, #10, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: mov z20.b, z0.b[25] |
| ; VBITS_GE_512-NEXT: fmov w24, s17 |
| ; VBITS_GE_512-NEXT: ubfiz w4, w22, #20, #1 |
| ; VBITS_GE_512-NEXT: orr w12, w12, w13 |
| ; VBITS_GE_512-NEXT: mov z21.b, z0.b[26] |
| ; VBITS_GE_512-NEXT: fmov w25, s18 |
| ; VBITS_GE_512-NEXT: orr w3, w5, w20 |
| ; VBITS_GE_512-NEXT: bfi w6, w11, #3, #1 |
| ; VBITS_GE_512-NEXT: orr w11, w12, w1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w16, #11, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: mov z22.b, z0.b[27] |
| ; VBITS_GE_512-NEXT: fmov w26, s19 |
| ; VBITS_GE_512-NEXT: orr w13, w3, w4 |
| ; VBITS_GE_512-NEXT: ubfiz w3, w23, #21, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w16, w17, #12, #1 |
| ; VBITS_GE_512-NEXT: fmov w27, s20 |
| ; VBITS_GE_512-NEXT: ubfiz w17, w24, #22, #1 |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w18, #13, #1 |
| ; VBITS_GE_512-NEXT: fmov w28, s21 |
| ; VBITS_GE_512-NEXT: orr w13, w13, w3 |
| ; VBITS_GE_512-NEXT: ubfiz w18, w25, #23, #1 |
| ; VBITS_GE_512-NEXT: bfi w6, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: orr w10, w11, w16 |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[15] |
| ; VBITS_GE_512-NEXT: mov z3.b, z0.b[16] |
| ; VBITS_GE_512-NEXT: mov z23.b, z0.b[28] |
| ; VBITS_GE_512-NEXT: fmov w29, s22 |
| ; VBITS_GE_512-NEXT: orr w11, w13, w17 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w26, #24, #1 |
| ; VBITS_GE_512-NEXT: mov z4.b, z0.b[17] |
| ; VBITS_GE_512-NEXT: mov z24.b, z0.b[29] |
| ; VBITS_GE_512-NEXT: orr w11, w11, w18 |
| ; VBITS_GE_512-NEXT: bfi w6, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w9, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w27, #25, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[30] |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w14, w28, #26, #1 |
| ; VBITS_GE_512-NEXT: fmov w7, s3 |
| ; VBITS_GE_512-NEXT: fmov w30, s23 |
| ; VBITS_GE_512-NEXT: orr w9, w10, w9 |
| ; VBITS_GE_512-NEXT: orr w10, w11, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w29, #27, #1 |
| ; VBITS_GE_512-NEXT: umov w2, v0.b[6] |
| ; VBITS_GE_512-NEXT: fmov w19, s4 |
| ; VBITS_GE_512-NEXT: fmov w8, s24 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w15, #15, #1 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w14 |
| ; VBITS_GE_512-NEXT: ubfiz w14, w30, #28, #1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[31] |
| ; VBITS_GE_512-NEXT: orr w10, w10, w11 |
| ; VBITS_GE_512-NEXT: fmov w11, s2 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w7, #16, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w19, #17, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w8, w8, #29, #1 |
| ; VBITS_GE_512-NEXT: bfi w6, w2, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w14 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w11, #30, #1 |
| ; VBITS_GE_512-NEXT: orr w8, w10, w8 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: orr w9, w6, w9 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_512-NEXT: orr w8, w9, w8 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9, lsl #31 |
| ; VBITS_GE_512-NEXT: tbz w8, #0, .LBB25_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rb { z0.b }, p1/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB25_3 |
| ; VBITS_GE_512-NEXT: b .LBB25_4 |
| ; VBITS_GE_512-NEXT: .LBB25_2: |
| ; VBITS_GE_512-NEXT: ptrue p2.b, vl32 |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI25_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI25_0 |
| ; VBITS_GE_512-NEXT: ld1b { z0.b }, p2/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB25_4 |
| ; VBITS_GE_512-NEXT: .LBB25_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB25_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB25_36 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB25_37 |
| ; VBITS_GE_512-NEXT: .LBB25_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB25_38 |
| ; VBITS_GE_512-NEXT: .LBB25_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB25_39 |
| ; VBITS_GE_512-NEXT: .LBB25_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB25_40 |
| ; VBITS_GE_512-NEXT: .LBB25_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB25_41 |
| ; VBITS_GE_512-NEXT: .LBB25_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB25_42 |
| ; VBITS_GE_512-NEXT: .LBB25_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB25_43 |
| ; VBITS_GE_512-NEXT: .LBB25_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB25_44 |
| ; VBITS_GE_512-NEXT: .LBB25_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB25_45 |
| ; VBITS_GE_512-NEXT: .LBB25_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB25_46 |
| ; VBITS_GE_512-NEXT: .LBB25_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB25_47 |
| ; VBITS_GE_512-NEXT: .LBB25_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB25_48 |
| ; VBITS_GE_512-NEXT: .LBB25_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB25_49 |
| ; VBITS_GE_512-NEXT: .LBB25_18: // %else58 |
| ; VBITS_GE_512-NEXT: tbnz w8, #16, .LBB25_50 |
| ; VBITS_GE_512-NEXT: .LBB25_19: // %else62 |
| ; VBITS_GE_512-NEXT: tbnz w8, #17, .LBB25_51 |
| ; VBITS_GE_512-NEXT: .LBB25_20: // %else66 |
| ; VBITS_GE_512-NEXT: tbnz w8, #18, .LBB25_52 |
| ; VBITS_GE_512-NEXT: .LBB25_21: // %else70 |
| ; VBITS_GE_512-NEXT: tbnz w8, #19, .LBB25_53 |
| ; VBITS_GE_512-NEXT: .LBB25_22: // %else74 |
| ; VBITS_GE_512-NEXT: tbnz w8, #20, .LBB25_54 |
| ; VBITS_GE_512-NEXT: .LBB25_23: // %else78 |
| ; VBITS_GE_512-NEXT: tbnz w8, #21, .LBB25_55 |
| ; VBITS_GE_512-NEXT: .LBB25_24: // %else82 |
| ; VBITS_GE_512-NEXT: tbnz w8, #22, .LBB25_56 |
| ; VBITS_GE_512-NEXT: .LBB25_25: // %else86 |
| ; VBITS_GE_512-NEXT: tbnz w8, #23, .LBB25_57 |
| ; VBITS_GE_512-NEXT: .LBB25_26: // %else90 |
| ; VBITS_GE_512-NEXT: tbnz w8, #24, .LBB25_58 |
| ; VBITS_GE_512-NEXT: .LBB25_27: // %else94 |
| ; VBITS_GE_512-NEXT: tbnz w8, #25, .LBB25_59 |
| ; VBITS_GE_512-NEXT: .LBB25_28: // %else98 |
| ; VBITS_GE_512-NEXT: tbnz w8, #26, .LBB25_60 |
| ; VBITS_GE_512-NEXT: .LBB25_29: // %else102 |
| ; VBITS_GE_512-NEXT: tbnz w8, #27, .LBB25_61 |
| ; VBITS_GE_512-NEXT: .LBB25_30: // %else106 |
| ; VBITS_GE_512-NEXT: tbnz w8, #28, .LBB25_62 |
| ; VBITS_GE_512-NEXT: .LBB25_31: // %else110 |
| ; VBITS_GE_512-NEXT: tbnz w8, #29, .LBB25_63 |
| ; VBITS_GE_512-NEXT: .LBB25_32: // %else114 |
| ; VBITS_GE_512-NEXT: tbnz w8, #30, .LBB25_64 |
| ; VBITS_GE_512-NEXT: .LBB25_33: // %else118 |
| ; VBITS_GE_512-NEXT: tbz w8, #31, .LBB25_35 |
| ; VBITS_GE_512-NEXT: .LBB25_34: // %cond.load121 |
| ; VBITS_GE_512-NEXT: mov w8, #31 // =0x1f |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w8 |
| ; VBITS_GE_512-NEXT: ldrb w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB25_35: // %else122 |
| ; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b |
| ; VBITS_GE_512-NEXT: ldr x8, [sp] // 8-byte Reload |
| ; VBITS_GE_512-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8] |
| ; VBITS_GE_512-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: add sp, sp, #112 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB25_36: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB25_6 |
| ; VBITS_GE_512-NEXT: .LBB25_37: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB25_7 |
| ; VBITS_GE_512-NEXT: .LBB25_38: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB25_8 |
| ; VBITS_GE_512-NEXT: .LBB25_39: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB25_9 |
| ; VBITS_GE_512-NEXT: .LBB25_40: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB25_10 |
| ; VBITS_GE_512-NEXT: .LBB25_41: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB25_11 |
| ; VBITS_GE_512-NEXT: .LBB25_42: // %cond.load29 |
| ; VBITS_GE_512-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB25_12 |
| ; VBITS_GE_512-NEXT: .LBB25_43: // %cond.load33 |
| ; VBITS_GE_512-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB25_13 |
| ; VBITS_GE_512-NEXT: .LBB25_44: // %cond.load37 |
| ; VBITS_GE_512-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB25_14 |
| ; VBITS_GE_512-NEXT: .LBB25_45: // %cond.load41 |
| ; VBITS_GE_512-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB25_15 |
| ; VBITS_GE_512-NEXT: .LBB25_46: // %cond.load45 |
| ; VBITS_GE_512-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB25_16 |
| ; VBITS_GE_512-NEXT: .LBB25_47: // %cond.load49 |
| ; VBITS_GE_512-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB25_17 |
| ; VBITS_GE_512-NEXT: .LBB25_48: // %cond.load53 |
| ; VBITS_GE_512-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB25_18 |
| ; VBITS_GE_512-NEXT: .LBB25_49: // %cond.load57 |
| ; VBITS_GE_512-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #16, .LBB25_19 |
| ; VBITS_GE_512-NEXT: .LBB25_50: // %cond.load61 |
| ; VBITS_GE_512-NEXT: mov w9, #16 // =0x10 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #17, .LBB25_20 |
| ; VBITS_GE_512-NEXT: .LBB25_51: // %cond.load65 |
| ; VBITS_GE_512-NEXT: mov w9, #17 // =0x11 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #18, .LBB25_21 |
| ; VBITS_GE_512-NEXT: .LBB25_52: // %cond.load69 |
| ; VBITS_GE_512-NEXT: mov w9, #18 // =0x12 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #19, .LBB25_22 |
| ; VBITS_GE_512-NEXT: .LBB25_53: // %cond.load73 |
| ; VBITS_GE_512-NEXT: mov w9, #19 // =0x13 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #20, .LBB25_23 |
| ; VBITS_GE_512-NEXT: .LBB25_54: // %cond.load77 |
| ; VBITS_GE_512-NEXT: mov w9, #20 // =0x14 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #21, .LBB25_24 |
| ; VBITS_GE_512-NEXT: .LBB25_55: // %cond.load81 |
| ; VBITS_GE_512-NEXT: mov w9, #21 // =0x15 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #22, .LBB25_25 |
| ; VBITS_GE_512-NEXT: .LBB25_56: // %cond.load85 |
| ; VBITS_GE_512-NEXT: mov w9, #22 // =0x16 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #23, .LBB25_26 |
| ; VBITS_GE_512-NEXT: .LBB25_57: // %cond.load89 |
| ; VBITS_GE_512-NEXT: mov w9, #23 // =0x17 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #24, .LBB25_27 |
| ; VBITS_GE_512-NEXT: .LBB25_58: // %cond.load93 |
| ; VBITS_GE_512-NEXT: mov w9, #24 // =0x18 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #25, .LBB25_28 |
| ; VBITS_GE_512-NEXT: .LBB25_59: // %cond.load97 |
| ; VBITS_GE_512-NEXT: mov w9, #25 // =0x19 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #26, .LBB25_29 |
| ; VBITS_GE_512-NEXT: .LBB25_60: // %cond.load101 |
| ; VBITS_GE_512-NEXT: mov w9, #26 // =0x1a |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #27, .LBB25_30 |
| ; VBITS_GE_512-NEXT: .LBB25_61: // %cond.load105 |
| ; VBITS_GE_512-NEXT: mov w9, #27 // =0x1b |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #28, .LBB25_31 |
| ; VBITS_GE_512-NEXT: .LBB25_62: // %cond.load109 |
| ; VBITS_GE_512-NEXT: mov w9, #28 // =0x1c |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #29, .LBB25_32 |
| ; VBITS_GE_512-NEXT: .LBB25_63: // %cond.load113 |
| ; VBITS_GE_512-NEXT: mov w9, #29 // =0x1d |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #30, .LBB25_33 |
| ; VBITS_GE_512-NEXT: .LBB25_64: // %cond.load117 |
| ; VBITS_GE_512-NEXT: mov w9, #30 // =0x1e |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #31, .LBB25_34 |
| ; VBITS_GE_512-NEXT: b .LBB25_35 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v32i8i16_m16: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.h, vl16 |
| ; CHECK-EXPAND-NEXT: mov x8, #16 // =0x10 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x1, x8, lsl #1] |
| ; CHECK-EXPAND-NEXT: ld1h { z1.h }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.h, p0/z, z1.h, #0 |
| ; CHECK-EXPAND-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: ptrue p1.b, vl16 |
| ; CHECK-EXPAND-NEXT: uzp1 z3.b, z0.b, z0.b |
| ; CHECK-EXPAND-NEXT: uzp1 z2.b, z1.b, z1.b |
| ; CHECK-EXPAND-NEXT: splice z0.b, p1, { z2.b, z3.b } |
| ; CHECK-EXPAND-NEXT: ptrue p1.b, vl32 |
| ; CHECK-EXPAND-NEXT: cmpne p2.b, p1/z, z0.b, #0 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b |
| ; CHECK-EXPAND-NEXT: whilelo p1.b, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p1/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.h, z0.b |
| ; CHECK-EXPAND-NEXT: sunpklo z1.h, z1.b |
| ; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <32 x i16>, ptr %bp |
| %mask = icmp eq <32 x i16> %b, zeroinitializer |
| %load = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %ap, <32 x i1> %mask, <32 x i8> poison) |
| %ext = sext <32 x i8> %load to <32 x i16> |
| store <32 x i16> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v16i8i32_m32(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_v16i8i32_m32: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI26_0 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, #0 |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b |
| ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] |
| ; VBITS_GE_256-NEXT: ldr q0, [x8, :lo12:.LCPI26_0] |
| ; VBITS_GE_256-NEXT: and v0.16b, v1.16b, v0.16b |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_256-NEXT: addv h0, v0.8h |
| ; VBITS_GE_256-NEXT: fmov w9, s0 |
| ; VBITS_GE_256-NEXT: fmov w8, s0 |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB26_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: fmov s0, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB26_3 |
| ; VBITS_GE_256-NEXT: b .LBB26_4 |
| ; VBITS_GE_256-NEXT: .LBB26_2: |
| ; VBITS_GE_256-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB26_4 |
| ; VBITS_GE_256-NEXT: .LBB26_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_256-NEXT: .LBB26_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB26_20 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB26_21 |
| ; VBITS_GE_256-NEXT: .LBB26_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB26_22 |
| ; VBITS_GE_256-NEXT: .LBB26_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB26_23 |
| ; VBITS_GE_256-NEXT: .LBB26_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB26_24 |
| ; VBITS_GE_256-NEXT: .LBB26_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB26_25 |
| ; VBITS_GE_256-NEXT: .LBB26_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB26_26 |
| ; VBITS_GE_256-NEXT: .LBB26_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB26_27 |
| ; VBITS_GE_256-NEXT: .LBB26_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB26_28 |
| ; VBITS_GE_256-NEXT: .LBB26_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB26_29 |
| ; VBITS_GE_256-NEXT: .LBB26_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB26_30 |
| ; VBITS_GE_256-NEXT: .LBB26_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB26_31 |
| ; VBITS_GE_256-NEXT: .LBB26_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB26_32 |
| ; VBITS_GE_256-NEXT: .LBB26_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB26_19 |
| ; VBITS_GE_256-NEXT: .LBB26_18: // %cond.load57 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[15], [x0] |
| ; VBITS_GE_256-NEXT: .LBB26_19: // %else58 |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b |
| ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB26_20: // %cond.load5 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB26_6 |
| ; VBITS_GE_256-NEXT: .LBB26_21: // %cond.load9 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB26_7 |
| ; VBITS_GE_256-NEXT: .LBB26_22: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB26_8 |
| ; VBITS_GE_256-NEXT: .LBB26_23: // %cond.load17 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB26_9 |
| ; VBITS_GE_256-NEXT: .LBB26_24: // %cond.load21 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB26_10 |
| ; VBITS_GE_256-NEXT: .LBB26_25: // %cond.load25 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB26_11 |
| ; VBITS_GE_256-NEXT: .LBB26_26: // %cond.load29 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[8], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB26_12 |
| ; VBITS_GE_256-NEXT: .LBB26_27: // %cond.load33 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[9], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB26_13 |
| ; VBITS_GE_256-NEXT: .LBB26_28: // %cond.load37 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[10], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB26_14 |
| ; VBITS_GE_256-NEXT: .LBB26_29: // %cond.load41 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[11], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB26_15 |
| ; VBITS_GE_256-NEXT: .LBB26_30: // %cond.load45 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[12], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB26_16 |
| ; VBITS_GE_256-NEXT: .LBB26_31: // %cond.load49 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[13], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB26_17 |
| ; VBITS_GE_256-NEXT: .LBB26_32: // %cond.load53 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[14], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB26_18 |
| ; VBITS_GE_256-NEXT: b .LBB26_19 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_v16i8i32_m32: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[8] |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[3] |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[4] |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[10] |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[5] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[9] |
| ; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[11] |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[6] |
| ; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w15 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xffff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB26_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: fmov s0, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB26_3 |
| ; VBITS_GE_512-NEXT: b .LBB26_4 |
| ; VBITS_GE_512-NEXT: .LBB26_2: |
| ; VBITS_GE_512-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB26_4 |
| ; VBITS_GE_512-NEXT: .LBB26_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_512-NEXT: .LBB26_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB26_20 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB26_21 |
| ; VBITS_GE_512-NEXT: .LBB26_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB26_22 |
| ; VBITS_GE_512-NEXT: .LBB26_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB26_23 |
| ; VBITS_GE_512-NEXT: .LBB26_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB26_24 |
| ; VBITS_GE_512-NEXT: .LBB26_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB26_25 |
| ; VBITS_GE_512-NEXT: .LBB26_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB26_26 |
| ; VBITS_GE_512-NEXT: .LBB26_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB26_27 |
| ; VBITS_GE_512-NEXT: .LBB26_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB26_28 |
| ; VBITS_GE_512-NEXT: .LBB26_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB26_29 |
| ; VBITS_GE_512-NEXT: .LBB26_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB26_30 |
| ; VBITS_GE_512-NEXT: .LBB26_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB26_31 |
| ; VBITS_GE_512-NEXT: .LBB26_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB26_32 |
| ; VBITS_GE_512-NEXT: .LBB26_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB26_19 |
| ; VBITS_GE_512-NEXT: .LBB26_18: // %cond.load57 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[15], [x0] |
| ; VBITS_GE_512-NEXT: .LBB26_19: // %else58 |
| ; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b |
| ; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB26_20: // %cond.load5 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB26_6 |
| ; VBITS_GE_512-NEXT: .LBB26_21: // %cond.load9 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB26_7 |
| ; VBITS_GE_512-NEXT: .LBB26_22: // %cond.load13 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB26_8 |
| ; VBITS_GE_512-NEXT: .LBB26_23: // %cond.load17 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB26_9 |
| ; VBITS_GE_512-NEXT: .LBB26_24: // %cond.load21 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB26_10 |
| ; VBITS_GE_512-NEXT: .LBB26_25: // %cond.load25 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB26_11 |
| ; VBITS_GE_512-NEXT: .LBB26_26: // %cond.load29 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[8], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB26_12 |
| ; VBITS_GE_512-NEXT: .LBB26_27: // %cond.load33 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[9], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB26_13 |
| ; VBITS_GE_512-NEXT: .LBB26_28: // %cond.load37 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[10], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB26_14 |
| ; VBITS_GE_512-NEXT: .LBB26_29: // %cond.load41 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[11], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB26_15 |
| ; VBITS_GE_512-NEXT: .LBB26_30: // %cond.load45 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[12], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB26_16 |
| ; VBITS_GE_512-NEXT: .LBB26_31: // %cond.load49 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[13], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB26_17 |
| ; VBITS_GE_512-NEXT: .LBB26_32: // %cond.load53 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[14], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB26_18 |
| ; VBITS_GE_512-NEXT: b .LBB26_19 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v16i8i32_m32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.s, p0/z, z1.s, #0 |
| ; CHECK-EXPAND-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: ptrue p1.b, vl16 |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: uzp1 z1.h, z1.h, z1.h |
| ; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-EXPAND-NEXT: uzp1 z1.b, z1.b, z1.b |
| ; CHECK-EXPAND-NEXT: mov v1.d[1], v0.d[0] |
| ; CHECK-EXPAND-NEXT: cmpne p2.b, p1/z, z1.b, #0 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b |
| ; CHECK-EXPAND-NEXT: whilelo p1.b, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p1/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.h, z0.b |
| ; CHECK-EXPAND-NEXT: sunpklo z1.h, z1.b |
| ; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <16 x i32>, ptr %bp |
| %mask = icmp eq <16 x i32> %b, zeroinitializer |
| %load = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %ap, <16 x i1> %mask, <16 x i8> poison) |
| %ext = sext <16 x i8> %load to <16 x i32> |
| store <16 x i32> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v8i8i64_m64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_v8i8i64_m64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 |
| ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 |
| ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s |
| ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB27_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: fmov s0, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB27_3 |
| ; VBITS_GE_256-NEXT: b .LBB27_4 |
| ; VBITS_GE_256-NEXT: .LBB27_2: |
| ; VBITS_GE_256-NEXT: // implicit-def: $d0 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB27_4 |
| ; VBITS_GE_256-NEXT: .LBB27_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_256-NEXT: .LBB27_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB27_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB27_13 |
| ; VBITS_GE_256-NEXT: .LBB27_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB27_14 |
| ; VBITS_GE_256-NEXT: .LBB27_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB27_15 |
| ; VBITS_GE_256-NEXT: .LBB27_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB27_16 |
| ; VBITS_GE_256-NEXT: .LBB27_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB27_11 |
| ; VBITS_GE_256-NEXT: .LBB27_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0] |
| ; VBITS_GE_256-NEXT: .LBB27_11: // %else26 |
| ; VBITS_GE_256-NEXT: sshll v0.8h, v0.8b, #0 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB27_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB27_6 |
| ; VBITS_GE_256-NEXT: .LBB27_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB27_7 |
| ; VBITS_GE_256-NEXT: .LBB27_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB27_8 |
| ; VBITS_GE_256-NEXT: .LBB27_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB27_9 |
| ; VBITS_GE_256-NEXT: .LBB27_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB27_10 |
| ; VBITS_GE_256-NEXT: b .LBB27_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_v8i8i64_m64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB27_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: fmov s0, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB27_3 |
| ; VBITS_GE_512-NEXT: b .LBB27_4 |
| ; VBITS_GE_512-NEXT: .LBB27_2: |
| ; VBITS_GE_512-NEXT: // implicit-def: $d0 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB27_4 |
| ; VBITS_GE_512-NEXT: .LBB27_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_512-NEXT: .LBB27_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB27_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB27_13 |
| ; VBITS_GE_512-NEXT: .LBB27_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB27_14 |
| ; VBITS_GE_512-NEXT: .LBB27_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB27_15 |
| ; VBITS_GE_512-NEXT: .LBB27_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB27_16 |
| ; VBITS_GE_512-NEXT: .LBB27_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB27_11 |
| ; VBITS_GE_512-NEXT: .LBB27_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0] |
| ; VBITS_GE_512-NEXT: .LBB27_11: // %else26 |
| ; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b |
| ; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB27_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB27_6 |
| ; VBITS_GE_512-NEXT: .LBB27_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB27_7 |
| ; VBITS_GE_512-NEXT: .LBB27_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB27_8 |
| ; VBITS_GE_512-NEXT: .LBB27_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB27_9 |
| ; VBITS_GE_512-NEXT: .LBB27_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB27_10 |
| ; VBITS_GE_512-NEXT: b .LBB27_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v8i8i64_m64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z1.d, #0 |
| ; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: ptrue p1.s, vl4 |
| ; CHECK-EXPAND-NEXT: uzp1 z3.s, z0.s, z0.s |
| ; CHECK-EXPAND-NEXT: uzp1 z2.s, z1.s, z1.s |
| ; CHECK-EXPAND-NEXT: splice z0.s, p1, { z2.s, z3.s } |
| ; CHECK-EXPAND-NEXT: ptrue p1.b, vl8 |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-EXPAND-NEXT: cmpne p2.b, p1/z, z0.b, #0 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b |
| ; CHECK-EXPAND-NEXT: whilelo p1.b, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p1/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b |
| ; CHECK-EXPAND-NEXT: sshll v0.8h, v0.8b, #0 |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i64>, ptr %bp |
| %mask = icmp eq <8 x i64> %b, zeroinitializer |
| %load = call <8 x i8> @llvm.masked.expandload.v8i8(ptr %ap, <8 x i1> %mask, <8 x i8> poison) |
| %ext = sext <8 x i8> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v16i16i32_m32(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_v16i16i32_m32: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI28_0 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, #0 |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b |
| ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] |
| ; VBITS_GE_256-NEXT: ldr q0, [x8, :lo12:.LCPI28_0] |
| ; VBITS_GE_256-NEXT: and v0.16b, v1.16b, v0.16b |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_256-NEXT: addv h0, v0.8h |
| ; VBITS_GE_256-NEXT: fmov w9, s0 |
| ; VBITS_GE_256-NEXT: fmov w8, s0 |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB28_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rh { z0.h }, p1/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB28_3 |
| ; VBITS_GE_256-NEXT: b .LBB28_4 |
| ; VBITS_GE_256-NEXT: .LBB28_2: |
| ; VBITS_GE_256-NEXT: ptrue p2.h, vl16 |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI28_1 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI28_1 |
| ; VBITS_GE_256-NEXT: ld1h { z0.h }, p2/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB28_4 |
| ; VBITS_GE_256-NEXT: .LBB28_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB28_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB28_20 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB28_21 |
| ; VBITS_GE_256-NEXT: .LBB28_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB28_22 |
| ; VBITS_GE_256-NEXT: .LBB28_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB28_23 |
| ; VBITS_GE_256-NEXT: .LBB28_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB28_24 |
| ; VBITS_GE_256-NEXT: .LBB28_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB28_25 |
| ; VBITS_GE_256-NEXT: .LBB28_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB28_26 |
| ; VBITS_GE_256-NEXT: .LBB28_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB28_27 |
| ; VBITS_GE_256-NEXT: .LBB28_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB28_28 |
| ; VBITS_GE_256-NEXT: .LBB28_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB28_29 |
| ; VBITS_GE_256-NEXT: .LBB28_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB28_30 |
| ; VBITS_GE_256-NEXT: .LBB28_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB28_31 |
| ; VBITS_GE_256-NEXT: .LBB28_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB28_32 |
| ; VBITS_GE_256-NEXT: .LBB28_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB28_19 |
| ; VBITS_GE_256-NEXT: .LBB28_18: // %cond.load57 |
| ; VBITS_GE_256-NEXT: mov w8, #15 // =0xf |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w8 |
| ; VBITS_GE_256-NEXT: ldrh w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB28_19: // %else58 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB28_20: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB28_6 |
| ; VBITS_GE_256-NEXT: .LBB28_21: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB28_7 |
| ; VBITS_GE_256-NEXT: .LBB28_22: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB28_8 |
| ; VBITS_GE_256-NEXT: .LBB28_23: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB28_9 |
| ; VBITS_GE_256-NEXT: .LBB28_24: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB28_10 |
| ; VBITS_GE_256-NEXT: .LBB28_25: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB28_11 |
| ; VBITS_GE_256-NEXT: .LBB28_26: // %cond.load29 |
| ; VBITS_GE_256-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB28_12 |
| ; VBITS_GE_256-NEXT: .LBB28_27: // %cond.load33 |
| ; VBITS_GE_256-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB28_13 |
| ; VBITS_GE_256-NEXT: .LBB28_28: // %cond.load37 |
| ; VBITS_GE_256-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB28_14 |
| ; VBITS_GE_256-NEXT: .LBB28_29: // %cond.load41 |
| ; VBITS_GE_256-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB28_15 |
| ; VBITS_GE_256-NEXT: .LBB28_30: // %cond.load45 |
| ; VBITS_GE_256-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB28_16 |
| ; VBITS_GE_256-NEXT: .LBB28_31: // %cond.load49 |
| ; VBITS_GE_256-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB28_17 |
| ; VBITS_GE_256-NEXT: .LBB28_32: // %cond.load53 |
| ; VBITS_GE_256-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB28_18 |
| ; VBITS_GE_256-NEXT: b .LBB28_19 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_v16i16i32_m32: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p1.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[8] |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[3] |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[4] |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[10] |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[5] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[9] |
| ; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[11] |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[6] |
| ; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w15 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xffff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB28_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rh { z0.h }, p1/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB28_3 |
| ; VBITS_GE_512-NEXT: b .LBB28_4 |
| ; VBITS_GE_512-NEXT: .LBB28_2: |
| ; VBITS_GE_512-NEXT: ptrue p2.h, vl16 |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI28_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI28_0 |
| ; VBITS_GE_512-NEXT: ld1h { z0.h }, p2/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB28_4 |
| ; VBITS_GE_512-NEXT: .LBB28_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB28_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB28_20 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB28_21 |
| ; VBITS_GE_512-NEXT: .LBB28_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB28_22 |
| ; VBITS_GE_512-NEXT: .LBB28_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB28_23 |
| ; VBITS_GE_512-NEXT: .LBB28_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB28_24 |
| ; VBITS_GE_512-NEXT: .LBB28_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB28_25 |
| ; VBITS_GE_512-NEXT: .LBB28_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB28_26 |
| ; VBITS_GE_512-NEXT: .LBB28_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB28_27 |
| ; VBITS_GE_512-NEXT: .LBB28_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB28_28 |
| ; VBITS_GE_512-NEXT: .LBB28_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB28_29 |
| ; VBITS_GE_512-NEXT: .LBB28_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB28_30 |
| ; VBITS_GE_512-NEXT: .LBB28_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB28_31 |
| ; VBITS_GE_512-NEXT: .LBB28_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB28_32 |
| ; VBITS_GE_512-NEXT: .LBB28_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB28_19 |
| ; VBITS_GE_512-NEXT: .LBB28_18: // %cond.load57 |
| ; VBITS_GE_512-NEXT: mov w8, #15 // =0xf |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w8 |
| ; VBITS_GE_512-NEXT: ldrh w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB28_19: // %else58 |
| ; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB28_20: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB28_6 |
| ; VBITS_GE_512-NEXT: .LBB28_21: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB28_7 |
| ; VBITS_GE_512-NEXT: .LBB28_22: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB28_8 |
| ; VBITS_GE_512-NEXT: .LBB28_23: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB28_9 |
| ; VBITS_GE_512-NEXT: .LBB28_24: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB28_10 |
| ; VBITS_GE_512-NEXT: .LBB28_25: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB28_11 |
| ; VBITS_GE_512-NEXT: .LBB28_26: // %cond.load29 |
| ; VBITS_GE_512-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB28_12 |
| ; VBITS_GE_512-NEXT: .LBB28_27: // %cond.load33 |
| ; VBITS_GE_512-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB28_13 |
| ; VBITS_GE_512-NEXT: .LBB28_28: // %cond.load37 |
| ; VBITS_GE_512-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB28_14 |
| ; VBITS_GE_512-NEXT: .LBB28_29: // %cond.load41 |
| ; VBITS_GE_512-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB28_15 |
| ; VBITS_GE_512-NEXT: .LBB28_30: // %cond.load45 |
| ; VBITS_GE_512-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB28_16 |
| ; VBITS_GE_512-NEXT: .LBB28_31: // %cond.load49 |
| ; VBITS_GE_512-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB28_17 |
| ; VBITS_GE_512-NEXT: .LBB28_32: // %cond.load53 |
| ; VBITS_GE_512-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB28_18 |
| ; VBITS_GE_512-NEXT: b .LBB28_19 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v16i16i32_m32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.s, p0/z, z1.s, #0 |
| ; CHECK-EXPAND-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: ptrue p1.h, vl16 |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: uzp1 z1.h, z1.h, z1.h |
| ; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-EXPAND-NEXT: uzp1 z1.b, z1.b, z1.b |
| ; CHECK-EXPAND-NEXT: mov v1.d[1], v0.d[0] |
| ; CHECK-EXPAND-NEXT: sunpklo z0.h, z1.b |
| ; CHECK-EXPAND-NEXT: cmpne p2.h, p1/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.h |
| ; CHECK-EXPAND-NEXT: whilelo p1.h, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p1/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.h, p2, z0.h |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <16 x i32>, ptr %bp |
| %mask = icmp eq <16 x i32> %b, zeroinitializer |
| %load = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %ap, <16 x i1> %mask, <16 x i16> poison) |
| %ext = sext <16 x i16> %load to <16 x i32> |
| store <16 x i32> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v8i16i64_m64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_v8i16i64_m64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 |
| ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 |
| ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s |
| ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB29_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: fmov s0, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB29_3 |
| ; VBITS_GE_256-NEXT: b .LBB29_4 |
| ; VBITS_GE_256-NEXT: .LBB29_2: |
| ; VBITS_GE_256-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB29_4 |
| ; VBITS_GE_256-NEXT: .LBB29_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[1], [x0], #2 |
| ; VBITS_GE_256-NEXT: .LBB29_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB29_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB29_13 |
| ; VBITS_GE_256-NEXT: .LBB29_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB29_14 |
| ; VBITS_GE_256-NEXT: .LBB29_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB29_15 |
| ; VBITS_GE_256-NEXT: .LBB29_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB29_16 |
| ; VBITS_GE_256-NEXT: .LBB29_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB29_11 |
| ; VBITS_GE_256-NEXT: .LBB29_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[7], [x0] |
| ; VBITS_GE_256-NEXT: .LBB29_11: // %else26 |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB29_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[2], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB29_6 |
| ; VBITS_GE_256-NEXT: .LBB29_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[3], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB29_7 |
| ; VBITS_GE_256-NEXT: .LBB29_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[4], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB29_8 |
| ; VBITS_GE_256-NEXT: .LBB29_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[5], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB29_9 |
| ; VBITS_GE_256-NEXT: .LBB29_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[6], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB29_10 |
| ; VBITS_GE_256-NEXT: b .LBB29_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_v8i16i64_m64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB29_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: fmov s0, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB29_3 |
| ; VBITS_GE_512-NEXT: b .LBB29_4 |
| ; VBITS_GE_512-NEXT: .LBB29_2: |
| ; VBITS_GE_512-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB29_4 |
| ; VBITS_GE_512-NEXT: .LBB29_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[1], [x0], #2 |
| ; VBITS_GE_512-NEXT: .LBB29_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB29_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB29_13 |
| ; VBITS_GE_512-NEXT: .LBB29_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB29_14 |
| ; VBITS_GE_512-NEXT: .LBB29_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB29_15 |
| ; VBITS_GE_512-NEXT: .LBB29_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB29_16 |
| ; VBITS_GE_512-NEXT: .LBB29_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB29_11 |
| ; VBITS_GE_512-NEXT: .LBB29_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[7], [x0] |
| ; VBITS_GE_512-NEXT: .LBB29_11: // %else26 |
| ; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB29_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[2], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB29_6 |
| ; VBITS_GE_512-NEXT: .LBB29_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[3], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB29_7 |
| ; VBITS_GE_512-NEXT: .LBB29_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[4], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB29_8 |
| ; VBITS_GE_512-NEXT: .LBB29_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[5], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB29_9 |
| ; VBITS_GE_512-NEXT: .LBB29_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[6], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB29_10 |
| ; VBITS_GE_512-NEXT: b .LBB29_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v8i16i64_m64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z1.d, #0 |
| ; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: ptrue p1.s, vl4 |
| ; CHECK-EXPAND-NEXT: uzp1 z3.s, z0.s, z0.s |
| ; CHECK-EXPAND-NEXT: uzp1 z2.s, z1.s, z1.s |
| ; CHECK-EXPAND-NEXT: splice z0.s, p1, { z2.s, z3.s } |
| ; CHECK-EXPAND-NEXT: ptrue p1.h, vl8 |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: cmpne p2.h, p1/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.h |
| ; CHECK-EXPAND-NEXT: whilelo p1.h, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p1/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.h, p2, z0.h |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i64>, ptr %bp |
| %mask = icmp eq <8 x i64> %b, zeroinitializer |
| %load = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %ap, <8 x i1> %mask, <8 x i16> poison) |
| %ext = sext <8 x i16> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v8i32i64_m64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_v8i32i64_m64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 |
| ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 |
| ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s |
| ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB30_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rw { z0.s }, p1/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #4 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB30_3 |
| ; VBITS_GE_256-NEXT: b .LBB30_4 |
| ; VBITS_GE_256-NEXT: .LBB30_2: |
| ; VBITS_GE_256-NEXT: ptrue p2.s, vl8 |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI30_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI30_0 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p2/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB30_4 |
| ; VBITS_GE_256-NEXT: .LBB30_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB30_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB30_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB30_13 |
| ; VBITS_GE_256-NEXT: .LBB30_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB30_14 |
| ; VBITS_GE_256-NEXT: .LBB30_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB30_15 |
| ; VBITS_GE_256-NEXT: .LBB30_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB30_16 |
| ; VBITS_GE_256-NEXT: .LBB30_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB30_11 |
| ; VBITS_GE_256-NEXT: .LBB30_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w8 |
| ; VBITS_GE_256-NEXT: ldr w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB30_11: // %else26 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB30_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB30_6 |
| ; VBITS_GE_256-NEXT: .LBB30_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB30_7 |
| ; VBITS_GE_256-NEXT: .LBB30_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB30_8 |
| ; VBITS_GE_256-NEXT: .LBB30_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB30_9 |
| ; VBITS_GE_256-NEXT: .LBB30_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB30_10 |
| ; VBITS_GE_256-NEXT: b .LBB30_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_v8i32i64_m64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB30_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rw { z0.s }, p1/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #4 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB30_3 |
| ; VBITS_GE_512-NEXT: b .LBB30_4 |
| ; VBITS_GE_512-NEXT: .LBB30_2: |
| ; VBITS_GE_512-NEXT: ptrue p2.s, vl8 |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI30_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI30_0 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p2/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB30_4 |
| ; VBITS_GE_512-NEXT: .LBB30_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB30_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB30_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB30_13 |
| ; VBITS_GE_512-NEXT: .LBB30_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB30_14 |
| ; VBITS_GE_512-NEXT: .LBB30_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB30_15 |
| ; VBITS_GE_512-NEXT: .LBB30_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB30_16 |
| ; VBITS_GE_512-NEXT: .LBB30_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB30_11 |
| ; VBITS_GE_512-NEXT: .LBB30_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w8 |
| ; VBITS_GE_512-NEXT: ldr w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB30_11: // %else26 |
| ; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB30_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB30_6 |
| ; VBITS_GE_512-NEXT: .LBB30_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB30_7 |
| ; VBITS_GE_512-NEXT: .LBB30_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB30_8 |
| ; VBITS_GE_512-NEXT: .LBB30_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB30_9 |
| ; VBITS_GE_512-NEXT: .LBB30_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB30_10 |
| ; VBITS_GE_512-NEXT: b .LBB30_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v8i32i64_m64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z1.d, #0 |
| ; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: ptrue p1.s, vl4 |
| ; CHECK-EXPAND-NEXT: uzp1 z3.s, z0.s, z0.s |
| ; CHECK-EXPAND-NEXT: uzp1 z2.s, z1.s, z1.s |
| ; CHECK-EXPAND-NEXT: splice z0.s, p1, { z2.s, z3.s } |
| ; CHECK-EXPAND-NEXT: ptrue p1.s, vl8 |
| ; CHECK-EXPAND-NEXT: cmpne p2.s, p1/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.s |
| ; CHECK-EXPAND-NEXT: whilelo p1.s, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p1/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.s, p2, z0.s |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i64>, ptr %bp |
| %mask = icmp eq <8 x i64> %b, zeroinitializer |
| %load = call <8 x i32> @llvm.masked.expandload.v8i32(ptr %ap, <8 x i1> %mask, <8 x i32> poison) |
| %ext = sext <8 x i32> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v32i8i16_m16(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_v32i8i16_m16: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 |
| ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] |
| ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, #0 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z1.h, #0 |
| ; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z2.h, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.b |
| ; VBITS_GE_256-NEXT: uzp1 z1.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z2.b, z2.b |
| ; VBITS_GE_256-NEXT: umov w8, v1.b[0] |
| ; VBITS_GE_256-NEXT: umov w11, v0.b[3] |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[4] |
| ; VBITS_GE_256-NEXT: umov w13, v1.b[1] |
| ; VBITS_GE_256-NEXT: umov w9, v1.b[7] |
| ; VBITS_GE_256-NEXT: umov w10, v1.b[8] |
| ; VBITS_GE_256-NEXT: umov w16, v1.b[9] |
| ; VBITS_GE_256-NEXT: umov w17, v1.b[10] |
| ; VBITS_GE_256-NEXT: umov w18, v0.b[5] |
| ; VBITS_GE_256-NEXT: umov w14, v1.b[2] |
| ; VBITS_GE_256-NEXT: umov w15, v1.b[3] |
| ; VBITS_GE_256-NEXT: umov w1, v1.b[4] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: ubfiz w11, w11, #19, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #20, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w13, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[6] |
| ; VBITS_GE_256-NEXT: ubfiz w9, w9, #7, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w10, #8, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: umov w12, v1.b[11] |
| ; VBITS_GE_256-NEXT: ubfiz w16, w16, #9, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w17, w17, #10, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w18, w18, #21, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_256-NEXT: bfi w8, w14, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w14, v0.b[7] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: umov w16, v1.b[12] |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #22, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w18 |
| ; VBITS_GE_256-NEXT: umov w18, v0.b[8] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w17 |
| ; VBITS_GE_256-NEXT: umov w17, v1.b[13] |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #11, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w13 |
| ; VBITS_GE_256-NEXT: umov w13, v1.b[14] |
| ; VBITS_GE_256-NEXT: bfi w8, w15, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w15, v0.b[9] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[10] |
| ; VBITS_GE_256-NEXT: ubfiz w14, w14, #23, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w16, #12, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w18, w18, #24, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v1.b[5] |
| ; VBITS_GE_256-NEXT: ubfiz w17, w17, #13, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w14 |
| ; VBITS_GE_256-NEXT: bfi w8, w1, #4, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: umov w16, v1.b[15] |
| ; VBITS_GE_256-NEXT: ubfiz w15, w15, #25, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #14, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w18 |
| ; VBITS_GE_256-NEXT: umov w18, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w1, v0.b[11] |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #26, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w17 |
| ; VBITS_GE_256-NEXT: umov w17, v0.b[1] |
| ; VBITS_GE_256-NEXT: orr w11, w11, w15 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_256-NEXT: umov w14, v1.b[6] |
| ; VBITS_GE_256-NEXT: umov w15, v0.b[2] |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_256-NEXT: ubfiz w16, w16, #15, #1 |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[14] |
| ; VBITS_GE_256-NEXT: ubfiz w1, w1, #27, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w18, w18, #16, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: ubfiz w16, w17, #17, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w13, w13, #28, #1 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w1 |
| ; VBITS_GE_256-NEXT: bfi w8, w14, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w18 |
| ; VBITS_GE_256-NEXT: ubfiz w14, w15, #18, #1 |
| ; VBITS_GE_256-NEXT: ubfiz w12, w12, #29, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w9, w16 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w13 |
| ; VBITS_GE_256-NEXT: ubfiz w10, w10, #30, #1 |
| ; VBITS_GE_256-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_256-NEXT: orr w9, w9, w14 |
| ; VBITS_GE_256-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_256-NEXT: orr w9, w11, w10 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_256-NEXT: orr w8, w8, w13, lsl #31 |
| ; VBITS_GE_256-NEXT: tbz w8, #0, .LBB31_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rb { z0.b }, p1/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #1 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB31_3 |
| ; VBITS_GE_256-NEXT: b .LBB31_4 |
| ; VBITS_GE_256-NEXT: .LBB31_2: |
| ; VBITS_GE_256-NEXT: ptrue p2.b, vl32 |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI31_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI31_0 |
| ; VBITS_GE_256-NEXT: ld1b { z0.b }, p2/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB31_4 |
| ; VBITS_GE_256-NEXT: .LBB31_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB31_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB31_36 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB31_37 |
| ; VBITS_GE_256-NEXT: .LBB31_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB31_38 |
| ; VBITS_GE_256-NEXT: .LBB31_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB31_39 |
| ; VBITS_GE_256-NEXT: .LBB31_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB31_40 |
| ; VBITS_GE_256-NEXT: .LBB31_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB31_41 |
| ; VBITS_GE_256-NEXT: .LBB31_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB31_42 |
| ; VBITS_GE_256-NEXT: .LBB31_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB31_43 |
| ; VBITS_GE_256-NEXT: .LBB31_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB31_44 |
| ; VBITS_GE_256-NEXT: .LBB31_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB31_45 |
| ; VBITS_GE_256-NEXT: .LBB31_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB31_46 |
| ; VBITS_GE_256-NEXT: .LBB31_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB31_47 |
| ; VBITS_GE_256-NEXT: .LBB31_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB31_48 |
| ; VBITS_GE_256-NEXT: .LBB31_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB31_49 |
| ; VBITS_GE_256-NEXT: .LBB31_18: // %else58 |
| ; VBITS_GE_256-NEXT: tbnz w8, #16, .LBB31_50 |
| ; VBITS_GE_256-NEXT: .LBB31_19: // %else62 |
| ; VBITS_GE_256-NEXT: tbnz w8, #17, .LBB31_51 |
| ; VBITS_GE_256-NEXT: .LBB31_20: // %else66 |
| ; VBITS_GE_256-NEXT: tbnz w8, #18, .LBB31_52 |
| ; VBITS_GE_256-NEXT: .LBB31_21: // %else70 |
| ; VBITS_GE_256-NEXT: tbnz w8, #19, .LBB31_53 |
| ; VBITS_GE_256-NEXT: .LBB31_22: // %else74 |
| ; VBITS_GE_256-NEXT: tbnz w8, #20, .LBB31_54 |
| ; VBITS_GE_256-NEXT: .LBB31_23: // %else78 |
| ; VBITS_GE_256-NEXT: tbnz w8, #21, .LBB31_55 |
| ; VBITS_GE_256-NEXT: .LBB31_24: // %else82 |
| ; VBITS_GE_256-NEXT: tbnz w8, #22, .LBB31_56 |
| ; VBITS_GE_256-NEXT: .LBB31_25: // %else86 |
| ; VBITS_GE_256-NEXT: tbnz w8, #23, .LBB31_57 |
| ; VBITS_GE_256-NEXT: .LBB31_26: // %else90 |
| ; VBITS_GE_256-NEXT: tbnz w8, #24, .LBB31_58 |
| ; VBITS_GE_256-NEXT: .LBB31_27: // %else94 |
| ; VBITS_GE_256-NEXT: tbnz w8, #25, .LBB31_59 |
| ; VBITS_GE_256-NEXT: .LBB31_28: // %else98 |
| ; VBITS_GE_256-NEXT: tbnz w8, #26, .LBB31_60 |
| ; VBITS_GE_256-NEXT: .LBB31_29: // %else102 |
| ; VBITS_GE_256-NEXT: tbnz w8, #27, .LBB31_61 |
| ; VBITS_GE_256-NEXT: .LBB31_30: // %else106 |
| ; VBITS_GE_256-NEXT: tbnz w8, #28, .LBB31_62 |
| ; VBITS_GE_256-NEXT: .LBB31_31: // %else110 |
| ; VBITS_GE_256-NEXT: tbnz w8, #29, .LBB31_63 |
| ; VBITS_GE_256-NEXT: .LBB31_32: // %else114 |
| ; VBITS_GE_256-NEXT: tbnz w8, #30, .LBB31_64 |
| ; VBITS_GE_256-NEXT: .LBB31_33: // %else118 |
| ; VBITS_GE_256-NEXT: tbz w8, #31, .LBB31_35 |
| ; VBITS_GE_256-NEXT: .LBB31_34: // %cond.load121 |
| ; VBITS_GE_256-NEXT: mov w8, #31 // =0x1f |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w8 |
| ; VBITS_GE_256-NEXT: ldrb w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB31_35: // %else122 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b |
| ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b |
| ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB31_36: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB31_6 |
| ; VBITS_GE_256-NEXT: .LBB31_37: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB31_7 |
| ; VBITS_GE_256-NEXT: .LBB31_38: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB31_8 |
| ; VBITS_GE_256-NEXT: .LBB31_39: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB31_9 |
| ; VBITS_GE_256-NEXT: .LBB31_40: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB31_10 |
| ; VBITS_GE_256-NEXT: .LBB31_41: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB31_11 |
| ; VBITS_GE_256-NEXT: .LBB31_42: // %cond.load29 |
| ; VBITS_GE_256-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB31_12 |
| ; VBITS_GE_256-NEXT: .LBB31_43: // %cond.load33 |
| ; VBITS_GE_256-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB31_13 |
| ; VBITS_GE_256-NEXT: .LBB31_44: // %cond.load37 |
| ; VBITS_GE_256-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB31_14 |
| ; VBITS_GE_256-NEXT: .LBB31_45: // %cond.load41 |
| ; VBITS_GE_256-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB31_15 |
| ; VBITS_GE_256-NEXT: .LBB31_46: // %cond.load45 |
| ; VBITS_GE_256-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB31_16 |
| ; VBITS_GE_256-NEXT: .LBB31_47: // %cond.load49 |
| ; VBITS_GE_256-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB31_17 |
| ; VBITS_GE_256-NEXT: .LBB31_48: // %cond.load53 |
| ; VBITS_GE_256-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB31_18 |
| ; VBITS_GE_256-NEXT: .LBB31_49: // %cond.load57 |
| ; VBITS_GE_256-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #16, .LBB31_19 |
| ; VBITS_GE_256-NEXT: .LBB31_50: // %cond.load61 |
| ; VBITS_GE_256-NEXT: mov w9, #16 // =0x10 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #17, .LBB31_20 |
| ; VBITS_GE_256-NEXT: .LBB31_51: // %cond.load65 |
| ; VBITS_GE_256-NEXT: mov w9, #17 // =0x11 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #18, .LBB31_21 |
| ; VBITS_GE_256-NEXT: .LBB31_52: // %cond.load69 |
| ; VBITS_GE_256-NEXT: mov w9, #18 // =0x12 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #19, .LBB31_22 |
| ; VBITS_GE_256-NEXT: .LBB31_53: // %cond.load73 |
| ; VBITS_GE_256-NEXT: mov w9, #19 // =0x13 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #20, .LBB31_23 |
| ; VBITS_GE_256-NEXT: .LBB31_54: // %cond.load77 |
| ; VBITS_GE_256-NEXT: mov w9, #20 // =0x14 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #21, .LBB31_24 |
| ; VBITS_GE_256-NEXT: .LBB31_55: // %cond.load81 |
| ; VBITS_GE_256-NEXT: mov w9, #21 // =0x15 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #22, .LBB31_25 |
| ; VBITS_GE_256-NEXT: .LBB31_56: // %cond.load85 |
| ; VBITS_GE_256-NEXT: mov w9, #22 // =0x16 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #23, .LBB31_26 |
| ; VBITS_GE_256-NEXT: .LBB31_57: // %cond.load89 |
| ; VBITS_GE_256-NEXT: mov w9, #23 // =0x17 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #24, .LBB31_27 |
| ; VBITS_GE_256-NEXT: .LBB31_58: // %cond.load93 |
| ; VBITS_GE_256-NEXT: mov w9, #24 // =0x18 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #25, .LBB31_28 |
| ; VBITS_GE_256-NEXT: .LBB31_59: // %cond.load97 |
| ; VBITS_GE_256-NEXT: mov w9, #25 // =0x19 |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #26, .LBB31_29 |
| ; VBITS_GE_256-NEXT: .LBB31_60: // %cond.load101 |
| ; VBITS_GE_256-NEXT: mov w9, #26 // =0x1a |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #27, .LBB31_30 |
| ; VBITS_GE_256-NEXT: .LBB31_61: // %cond.load105 |
| ; VBITS_GE_256-NEXT: mov w9, #27 // =0x1b |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #28, .LBB31_31 |
| ; VBITS_GE_256-NEXT: .LBB31_62: // %cond.load109 |
| ; VBITS_GE_256-NEXT: mov w9, #28 // =0x1c |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #29, .LBB31_32 |
| ; VBITS_GE_256-NEXT: .LBB31_63: // %cond.load113 |
| ; VBITS_GE_256-NEXT: mov w9, #29 // =0x1d |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #30, .LBB31_33 |
| ; VBITS_GE_256-NEXT: .LBB31_64: // %cond.load117 |
| ; VBITS_GE_256-NEXT: mov w9, #30 // =0x1e |
| ; VBITS_GE_256-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.b, w9 |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #31, .LBB31_34 |
| ; VBITS_GE_256-NEXT: b .LBB31_35 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_v32i8i16_m16: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #112 |
| ; VBITS_GE_512-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 112 |
| ; VBITS_GE_512-NEXT: .cfi_offset w19, -8 |
| ; VBITS_GE_512-NEXT: .cfi_offset w20, -16 |
| ; VBITS_GE_512-NEXT: .cfi_offset w21, -24 |
| ; VBITS_GE_512-NEXT: .cfi_offset w22, -32 |
| ; VBITS_GE_512-NEXT: .cfi_offset w23, -40 |
| ; VBITS_GE_512-NEXT: .cfi_offset w24, -48 |
| ; VBITS_GE_512-NEXT: .cfi_offset w25, -56 |
| ; VBITS_GE_512-NEXT: .cfi_offset w26, -64 |
| ; VBITS_GE_512-NEXT: .cfi_offset w27, -72 |
| ; VBITS_GE_512-NEXT: .cfi_offset w28, -80 |
| ; VBITS_GE_512-NEXT: .cfi_offset w30, -88 |
| ; VBITS_GE_512-NEXT: .cfi_offset w29, -96 |
| ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 |
| ; VBITS_GE_512-NEXT: str x2, [sp] // 8-byte Spill |
| ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, #0 |
| ; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p1.b |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[1] |
| ; VBITS_GE_512-NEXT: fmov w6, s0 |
| ; VBITS_GE_512-NEXT: umov w3, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w5, v0.b[8] |
| ; VBITS_GE_512-NEXT: mov z5.b, z0.b[18] |
| ; VBITS_GE_512-NEXT: mov z6.b, z0.b[19] |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w4, v0.b[9] |
| ; VBITS_GE_512-NEXT: mov z7.b, z0.b[20] |
| ; VBITS_GE_512-NEXT: umov w1, v0.b[10] |
| ; VBITS_GE_512-NEXT: and w6, w6, #0x1 |
| ; VBITS_GE_512-NEXT: mov z16.b, z0.b[21] |
| ; VBITS_GE_512-NEXT: fmov w20, s5 |
| ; VBITS_GE_512-NEXT: fmov w21, s6 |
| ; VBITS_GE_512-NEXT: bfi w6, w12, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[3] |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[11] |
| ; VBITS_GE_512-NEXT: mov z17.b, z0.b[22] |
| ; VBITS_GE_512-NEXT: fmov w22, s7 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w3, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w3, w5, #8, #1 |
| ; VBITS_GE_512-NEXT: umov w17, v0.b[12] |
| ; VBITS_GE_512-NEXT: mov z18.b, z0.b[23] |
| ; VBITS_GE_512-NEXT: bfi w6, w13, #2, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w4, #9, #1 |
| ; VBITS_GE_512-NEXT: umov w18, v0.b[13] |
| ; VBITS_GE_512-NEXT: mov z19.b, z0.b[24] |
| ; VBITS_GE_512-NEXT: fmov w23, s16 |
| ; VBITS_GE_512-NEXT: ubfiz w5, w20, #18, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w20, w21, #19, #1 |
| ; VBITS_GE_512-NEXT: orr w12, w12, w3 |
| ; VBITS_GE_512-NEXT: ubfiz w1, w1, #10, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: mov z20.b, z0.b[25] |
| ; VBITS_GE_512-NEXT: fmov w24, s17 |
| ; VBITS_GE_512-NEXT: ubfiz w4, w22, #20, #1 |
| ; VBITS_GE_512-NEXT: orr w12, w12, w13 |
| ; VBITS_GE_512-NEXT: mov z21.b, z0.b[26] |
| ; VBITS_GE_512-NEXT: fmov w25, s18 |
| ; VBITS_GE_512-NEXT: orr w3, w5, w20 |
| ; VBITS_GE_512-NEXT: bfi w6, w11, #3, #1 |
| ; VBITS_GE_512-NEXT: orr w11, w12, w1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w16, #11, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: mov z22.b, z0.b[27] |
| ; VBITS_GE_512-NEXT: fmov w26, s19 |
| ; VBITS_GE_512-NEXT: orr w13, w3, w4 |
| ; VBITS_GE_512-NEXT: ubfiz w3, w23, #21, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w16, w17, #12, #1 |
| ; VBITS_GE_512-NEXT: fmov w27, s20 |
| ; VBITS_GE_512-NEXT: ubfiz w17, w24, #22, #1 |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w18, #13, #1 |
| ; VBITS_GE_512-NEXT: fmov w28, s21 |
| ; VBITS_GE_512-NEXT: orr w13, w13, w3 |
| ; VBITS_GE_512-NEXT: ubfiz w18, w25, #23, #1 |
| ; VBITS_GE_512-NEXT: bfi w6, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: orr w10, w11, w16 |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[15] |
| ; VBITS_GE_512-NEXT: mov z3.b, z0.b[16] |
| ; VBITS_GE_512-NEXT: mov z23.b, z0.b[28] |
| ; VBITS_GE_512-NEXT: fmov w29, s22 |
| ; VBITS_GE_512-NEXT: orr w11, w13, w17 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w26, #24, #1 |
| ; VBITS_GE_512-NEXT: mov z4.b, z0.b[17] |
| ; VBITS_GE_512-NEXT: mov z24.b, z0.b[29] |
| ; VBITS_GE_512-NEXT: orr w11, w11, w18 |
| ; VBITS_GE_512-NEXT: bfi w6, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w9, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w27, #25, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, z0.b[30] |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w14, w28, #26, #1 |
| ; VBITS_GE_512-NEXT: fmov w7, s3 |
| ; VBITS_GE_512-NEXT: fmov w30, s23 |
| ; VBITS_GE_512-NEXT: orr w9, w10, w9 |
| ; VBITS_GE_512-NEXT: orr w10, w11, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w29, #27, #1 |
| ; VBITS_GE_512-NEXT: umov w2, v0.b[6] |
| ; VBITS_GE_512-NEXT: fmov w19, s4 |
| ; VBITS_GE_512-NEXT: fmov w8, s24 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w15, #15, #1 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w14 |
| ; VBITS_GE_512-NEXT: ubfiz w14, w30, #28, #1 |
| ; VBITS_GE_512-NEXT: mov z1.b, z0.b[31] |
| ; VBITS_GE_512-NEXT: orr w10, w10, w11 |
| ; VBITS_GE_512-NEXT: fmov w11, s2 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w7, #16, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w19, #17, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w8, w8, #29, #1 |
| ; VBITS_GE_512-NEXT: bfi w6, w2, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w10, w10, w14 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_512-NEXT: ubfiz w11, w11, #30, #1 |
| ; VBITS_GE_512-NEXT: orr w8, w10, w8 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: orr w9, w6, w9 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w11 |
| ; VBITS_GE_512-NEXT: orr w8, w9, w8 |
| ; VBITS_GE_512-NEXT: fmov w9, s1 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9, lsl #31 |
| ; VBITS_GE_512-NEXT: tbz w8, #0, .LBB31_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rb { z0.b }, p1/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB31_3 |
| ; VBITS_GE_512-NEXT: b .LBB31_4 |
| ; VBITS_GE_512-NEXT: .LBB31_2: |
| ; VBITS_GE_512-NEXT: ptrue p2.b, vl32 |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI31_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI31_0 |
| ; VBITS_GE_512-NEXT: ld1b { z0.b }, p2/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB31_4 |
| ; VBITS_GE_512-NEXT: .LBB31_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB31_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB31_36 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB31_37 |
| ; VBITS_GE_512-NEXT: .LBB31_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB31_38 |
| ; VBITS_GE_512-NEXT: .LBB31_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB31_39 |
| ; VBITS_GE_512-NEXT: .LBB31_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB31_40 |
| ; VBITS_GE_512-NEXT: .LBB31_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB31_41 |
| ; VBITS_GE_512-NEXT: .LBB31_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB31_42 |
| ; VBITS_GE_512-NEXT: .LBB31_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB31_43 |
| ; VBITS_GE_512-NEXT: .LBB31_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB31_44 |
| ; VBITS_GE_512-NEXT: .LBB31_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB31_45 |
| ; VBITS_GE_512-NEXT: .LBB31_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB31_46 |
| ; VBITS_GE_512-NEXT: .LBB31_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB31_47 |
| ; VBITS_GE_512-NEXT: .LBB31_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB31_48 |
| ; VBITS_GE_512-NEXT: .LBB31_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB31_49 |
| ; VBITS_GE_512-NEXT: .LBB31_18: // %else58 |
| ; VBITS_GE_512-NEXT: tbnz w8, #16, .LBB31_50 |
| ; VBITS_GE_512-NEXT: .LBB31_19: // %else62 |
| ; VBITS_GE_512-NEXT: tbnz w8, #17, .LBB31_51 |
| ; VBITS_GE_512-NEXT: .LBB31_20: // %else66 |
| ; VBITS_GE_512-NEXT: tbnz w8, #18, .LBB31_52 |
| ; VBITS_GE_512-NEXT: .LBB31_21: // %else70 |
| ; VBITS_GE_512-NEXT: tbnz w8, #19, .LBB31_53 |
| ; VBITS_GE_512-NEXT: .LBB31_22: // %else74 |
| ; VBITS_GE_512-NEXT: tbnz w8, #20, .LBB31_54 |
| ; VBITS_GE_512-NEXT: .LBB31_23: // %else78 |
| ; VBITS_GE_512-NEXT: tbnz w8, #21, .LBB31_55 |
| ; VBITS_GE_512-NEXT: .LBB31_24: // %else82 |
| ; VBITS_GE_512-NEXT: tbnz w8, #22, .LBB31_56 |
| ; VBITS_GE_512-NEXT: .LBB31_25: // %else86 |
| ; VBITS_GE_512-NEXT: tbnz w8, #23, .LBB31_57 |
| ; VBITS_GE_512-NEXT: .LBB31_26: // %else90 |
| ; VBITS_GE_512-NEXT: tbnz w8, #24, .LBB31_58 |
| ; VBITS_GE_512-NEXT: .LBB31_27: // %else94 |
| ; VBITS_GE_512-NEXT: tbnz w8, #25, .LBB31_59 |
| ; VBITS_GE_512-NEXT: .LBB31_28: // %else98 |
| ; VBITS_GE_512-NEXT: tbnz w8, #26, .LBB31_60 |
| ; VBITS_GE_512-NEXT: .LBB31_29: // %else102 |
| ; VBITS_GE_512-NEXT: tbnz w8, #27, .LBB31_61 |
| ; VBITS_GE_512-NEXT: .LBB31_30: // %else106 |
| ; VBITS_GE_512-NEXT: tbnz w8, #28, .LBB31_62 |
| ; VBITS_GE_512-NEXT: .LBB31_31: // %else110 |
| ; VBITS_GE_512-NEXT: tbnz w8, #29, .LBB31_63 |
| ; VBITS_GE_512-NEXT: .LBB31_32: // %else114 |
| ; VBITS_GE_512-NEXT: tbnz w8, #30, .LBB31_64 |
| ; VBITS_GE_512-NEXT: .LBB31_33: // %else118 |
| ; VBITS_GE_512-NEXT: tbz w8, #31, .LBB31_35 |
| ; VBITS_GE_512-NEXT: .LBB31_34: // %cond.load121 |
| ; VBITS_GE_512-NEXT: mov w8, #31 // =0x1f |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w8 |
| ; VBITS_GE_512-NEXT: ldrb w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB31_35: // %else122 |
| ; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b |
| ; VBITS_GE_512-NEXT: ldr x8, [sp] // 8-byte Reload |
| ; VBITS_GE_512-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8] |
| ; VBITS_GE_512-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; VBITS_GE_512-NEXT: add sp, sp, #112 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB31_36: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB31_6 |
| ; VBITS_GE_512-NEXT: .LBB31_37: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB31_7 |
| ; VBITS_GE_512-NEXT: .LBB31_38: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB31_8 |
| ; VBITS_GE_512-NEXT: .LBB31_39: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB31_9 |
| ; VBITS_GE_512-NEXT: .LBB31_40: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB31_10 |
| ; VBITS_GE_512-NEXT: .LBB31_41: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB31_11 |
| ; VBITS_GE_512-NEXT: .LBB31_42: // %cond.load29 |
| ; VBITS_GE_512-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB31_12 |
| ; VBITS_GE_512-NEXT: .LBB31_43: // %cond.load33 |
| ; VBITS_GE_512-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB31_13 |
| ; VBITS_GE_512-NEXT: .LBB31_44: // %cond.load37 |
| ; VBITS_GE_512-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB31_14 |
| ; VBITS_GE_512-NEXT: .LBB31_45: // %cond.load41 |
| ; VBITS_GE_512-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB31_15 |
| ; VBITS_GE_512-NEXT: .LBB31_46: // %cond.load45 |
| ; VBITS_GE_512-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB31_16 |
| ; VBITS_GE_512-NEXT: .LBB31_47: // %cond.load49 |
| ; VBITS_GE_512-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB31_17 |
| ; VBITS_GE_512-NEXT: .LBB31_48: // %cond.load53 |
| ; VBITS_GE_512-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB31_18 |
| ; VBITS_GE_512-NEXT: .LBB31_49: // %cond.load57 |
| ; VBITS_GE_512-NEXT: mov w9, #15 // =0xf |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #16, .LBB31_19 |
| ; VBITS_GE_512-NEXT: .LBB31_50: // %cond.load61 |
| ; VBITS_GE_512-NEXT: mov w9, #16 // =0x10 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #17, .LBB31_20 |
| ; VBITS_GE_512-NEXT: .LBB31_51: // %cond.load65 |
| ; VBITS_GE_512-NEXT: mov w9, #17 // =0x11 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #18, .LBB31_21 |
| ; VBITS_GE_512-NEXT: .LBB31_52: // %cond.load69 |
| ; VBITS_GE_512-NEXT: mov w9, #18 // =0x12 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #19, .LBB31_22 |
| ; VBITS_GE_512-NEXT: .LBB31_53: // %cond.load73 |
| ; VBITS_GE_512-NEXT: mov w9, #19 // =0x13 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #20, .LBB31_23 |
| ; VBITS_GE_512-NEXT: .LBB31_54: // %cond.load77 |
| ; VBITS_GE_512-NEXT: mov w9, #20 // =0x14 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #21, .LBB31_24 |
| ; VBITS_GE_512-NEXT: .LBB31_55: // %cond.load81 |
| ; VBITS_GE_512-NEXT: mov w9, #21 // =0x15 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #22, .LBB31_25 |
| ; VBITS_GE_512-NEXT: .LBB31_56: // %cond.load85 |
| ; VBITS_GE_512-NEXT: mov w9, #22 // =0x16 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #23, .LBB31_26 |
| ; VBITS_GE_512-NEXT: .LBB31_57: // %cond.load89 |
| ; VBITS_GE_512-NEXT: mov w9, #23 // =0x17 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #24, .LBB31_27 |
| ; VBITS_GE_512-NEXT: .LBB31_58: // %cond.load93 |
| ; VBITS_GE_512-NEXT: mov w9, #24 // =0x18 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #25, .LBB31_28 |
| ; VBITS_GE_512-NEXT: .LBB31_59: // %cond.load97 |
| ; VBITS_GE_512-NEXT: mov w9, #25 // =0x19 |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #26, .LBB31_29 |
| ; VBITS_GE_512-NEXT: .LBB31_60: // %cond.load101 |
| ; VBITS_GE_512-NEXT: mov w9, #26 // =0x1a |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #27, .LBB31_30 |
| ; VBITS_GE_512-NEXT: .LBB31_61: // %cond.load105 |
| ; VBITS_GE_512-NEXT: mov w9, #27 // =0x1b |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #28, .LBB31_31 |
| ; VBITS_GE_512-NEXT: .LBB31_62: // %cond.load109 |
| ; VBITS_GE_512-NEXT: mov w9, #28 // =0x1c |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #29, .LBB31_32 |
| ; VBITS_GE_512-NEXT: .LBB31_63: // %cond.load113 |
| ; VBITS_GE_512-NEXT: mov w9, #29 // =0x1d |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #30, .LBB31_33 |
| ; VBITS_GE_512-NEXT: .LBB31_64: // %cond.load117 |
| ; VBITS_GE_512-NEXT: mov w9, #30 // =0x1e |
| ; VBITS_GE_512-NEXT: index z1.b, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.b, w9 |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b |
| ; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #31, .LBB31_34 |
| ; VBITS_GE_512-NEXT: b .LBB31_35 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v32i8i16_m16: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.h, vl16 |
| ; CHECK-EXPAND-NEXT: mov x8, #16 // =0x10 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x1, x8, lsl #1] |
| ; CHECK-EXPAND-NEXT: ld1h { z1.h }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.h, p0/z, z1.h, #0 |
| ; CHECK-EXPAND-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: ptrue p1.b, vl16 |
| ; CHECK-EXPAND-NEXT: uzp1 z3.b, z0.b, z0.b |
| ; CHECK-EXPAND-NEXT: uzp1 z2.b, z1.b, z1.b |
| ; CHECK-EXPAND-NEXT: splice z0.b, p1, { z2.b, z3.b } |
| ; CHECK-EXPAND-NEXT: ptrue p1.b, vl32 |
| ; CHECK-EXPAND-NEXT: cmpne p2.b, p1/z, z0.b, #0 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b |
| ; CHECK-EXPAND-NEXT: whilelo p1.b, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p1/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.h, z0.b |
| ; CHECK-EXPAND-NEXT: uunpklo z1.h, z1.b |
| ; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <32 x i16>, ptr %bp |
| %mask = icmp eq <32 x i16> %b, zeroinitializer |
| %load = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %ap, <32 x i1> %mask, <32 x i8> poison) |
| %ext = zext <32 x i8> %load to <32 x i16> |
| store <32 x i16> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v16i8i32_m32(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_v16i8i32_m32: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI32_0 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, #0 |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b |
| ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] |
| ; VBITS_GE_256-NEXT: ldr q0, [x8, :lo12:.LCPI32_0] |
| ; VBITS_GE_256-NEXT: and v0.16b, v1.16b, v0.16b |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_256-NEXT: addv h0, v0.8h |
| ; VBITS_GE_256-NEXT: fmov w9, s0 |
| ; VBITS_GE_256-NEXT: fmov w8, s0 |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB32_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: fmov s0, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB32_3 |
| ; VBITS_GE_256-NEXT: b .LBB32_4 |
| ; VBITS_GE_256-NEXT: .LBB32_2: |
| ; VBITS_GE_256-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB32_4 |
| ; VBITS_GE_256-NEXT: .LBB32_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_256-NEXT: .LBB32_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB32_20 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB32_21 |
| ; VBITS_GE_256-NEXT: .LBB32_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB32_22 |
| ; VBITS_GE_256-NEXT: .LBB32_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB32_23 |
| ; VBITS_GE_256-NEXT: .LBB32_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB32_24 |
| ; VBITS_GE_256-NEXT: .LBB32_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB32_25 |
| ; VBITS_GE_256-NEXT: .LBB32_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB32_26 |
| ; VBITS_GE_256-NEXT: .LBB32_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB32_27 |
| ; VBITS_GE_256-NEXT: .LBB32_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB32_28 |
| ; VBITS_GE_256-NEXT: .LBB32_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB32_29 |
| ; VBITS_GE_256-NEXT: .LBB32_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB32_30 |
| ; VBITS_GE_256-NEXT: .LBB32_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB32_31 |
| ; VBITS_GE_256-NEXT: .LBB32_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB32_32 |
| ; VBITS_GE_256-NEXT: .LBB32_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB32_19 |
| ; VBITS_GE_256-NEXT: .LBB32_18: // %cond.load57 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[15], [x0] |
| ; VBITS_GE_256-NEXT: .LBB32_19: // %else58 |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b |
| ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB32_20: // %cond.load5 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB32_6 |
| ; VBITS_GE_256-NEXT: .LBB32_21: // %cond.load9 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB32_7 |
| ; VBITS_GE_256-NEXT: .LBB32_22: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB32_8 |
| ; VBITS_GE_256-NEXT: .LBB32_23: // %cond.load17 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB32_9 |
| ; VBITS_GE_256-NEXT: .LBB32_24: // %cond.load21 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB32_10 |
| ; VBITS_GE_256-NEXT: .LBB32_25: // %cond.load25 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB32_11 |
| ; VBITS_GE_256-NEXT: .LBB32_26: // %cond.load29 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[8], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB32_12 |
| ; VBITS_GE_256-NEXT: .LBB32_27: // %cond.load33 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[9], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB32_13 |
| ; VBITS_GE_256-NEXT: .LBB32_28: // %cond.load37 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[10], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB32_14 |
| ; VBITS_GE_256-NEXT: .LBB32_29: // %cond.load41 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[11], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB32_15 |
| ; VBITS_GE_256-NEXT: .LBB32_30: // %cond.load45 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[12], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB32_16 |
| ; VBITS_GE_256-NEXT: .LBB32_31: // %cond.load49 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[13], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB32_17 |
| ; VBITS_GE_256-NEXT: .LBB32_32: // %cond.load53 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[14], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB32_18 |
| ; VBITS_GE_256-NEXT: b .LBB32_19 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_v16i8i32_m32: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[8] |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[3] |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[4] |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[10] |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[5] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[9] |
| ; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[11] |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[6] |
| ; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w15 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xffff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB32_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: fmov s0, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB32_3 |
| ; VBITS_GE_512-NEXT: b .LBB32_4 |
| ; VBITS_GE_512-NEXT: .LBB32_2: |
| ; VBITS_GE_512-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB32_4 |
| ; VBITS_GE_512-NEXT: .LBB32_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_512-NEXT: .LBB32_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB32_20 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB32_21 |
| ; VBITS_GE_512-NEXT: .LBB32_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB32_22 |
| ; VBITS_GE_512-NEXT: .LBB32_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB32_23 |
| ; VBITS_GE_512-NEXT: .LBB32_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB32_24 |
| ; VBITS_GE_512-NEXT: .LBB32_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB32_25 |
| ; VBITS_GE_512-NEXT: .LBB32_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB32_26 |
| ; VBITS_GE_512-NEXT: .LBB32_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB32_27 |
| ; VBITS_GE_512-NEXT: .LBB32_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB32_28 |
| ; VBITS_GE_512-NEXT: .LBB32_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB32_29 |
| ; VBITS_GE_512-NEXT: .LBB32_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB32_30 |
| ; VBITS_GE_512-NEXT: .LBB32_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB32_31 |
| ; VBITS_GE_512-NEXT: .LBB32_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB32_32 |
| ; VBITS_GE_512-NEXT: .LBB32_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB32_19 |
| ; VBITS_GE_512-NEXT: .LBB32_18: // %cond.load57 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[15], [x0] |
| ; VBITS_GE_512-NEXT: .LBB32_19: // %else58 |
| ; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b |
| ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB32_20: // %cond.load5 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB32_6 |
| ; VBITS_GE_512-NEXT: .LBB32_21: // %cond.load9 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB32_7 |
| ; VBITS_GE_512-NEXT: .LBB32_22: // %cond.load13 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB32_8 |
| ; VBITS_GE_512-NEXT: .LBB32_23: // %cond.load17 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB32_9 |
| ; VBITS_GE_512-NEXT: .LBB32_24: // %cond.load21 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB32_10 |
| ; VBITS_GE_512-NEXT: .LBB32_25: // %cond.load25 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB32_11 |
| ; VBITS_GE_512-NEXT: .LBB32_26: // %cond.load29 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[8], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB32_12 |
| ; VBITS_GE_512-NEXT: .LBB32_27: // %cond.load33 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[9], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB32_13 |
| ; VBITS_GE_512-NEXT: .LBB32_28: // %cond.load37 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[10], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB32_14 |
| ; VBITS_GE_512-NEXT: .LBB32_29: // %cond.load41 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[11], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB32_15 |
| ; VBITS_GE_512-NEXT: .LBB32_30: // %cond.load45 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[12], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB32_16 |
| ; VBITS_GE_512-NEXT: .LBB32_31: // %cond.load49 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[13], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB32_17 |
| ; VBITS_GE_512-NEXT: .LBB32_32: // %cond.load53 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[14], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB32_18 |
| ; VBITS_GE_512-NEXT: b .LBB32_19 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v16i8i32_m32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.s, p0/z, z1.s, #0 |
| ; CHECK-EXPAND-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: ptrue p1.b, vl16 |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: uzp1 z1.h, z1.h, z1.h |
| ; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-EXPAND-NEXT: uzp1 z1.b, z1.b, z1.b |
| ; CHECK-EXPAND-NEXT: mov v1.d[1], v0.d[0] |
| ; CHECK-EXPAND-NEXT: cmpne p2.b, p1/z, z1.b, #0 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b |
| ; CHECK-EXPAND-NEXT: whilelo p1.b, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p1/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.h, z0.b |
| ; CHECK-EXPAND-NEXT: uunpklo z1.h, z1.b |
| ; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <16 x i32>, ptr %bp |
| %mask = icmp eq <16 x i32> %b, zeroinitializer |
| %load = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %ap, <16 x i1> %mask, <16 x i8> poison) |
| %ext = zext <16 x i8> %load to <16 x i32> |
| store <16 x i32> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v8i8i64_m64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_v8i8i64_m64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 |
| ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 |
| ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s |
| ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB33_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_256-NEXT: fmov s0, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB33_3 |
| ; VBITS_GE_256-NEXT: b .LBB33_4 |
| ; VBITS_GE_256-NEXT: .LBB33_2: |
| ; VBITS_GE_256-NEXT: // implicit-def: $d0 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB33_4 |
| ; VBITS_GE_256-NEXT: .LBB33_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_256-NEXT: .LBB33_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB33_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB33_13 |
| ; VBITS_GE_256-NEXT: .LBB33_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB33_14 |
| ; VBITS_GE_256-NEXT: .LBB33_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB33_15 |
| ; VBITS_GE_256-NEXT: .LBB33_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB33_16 |
| ; VBITS_GE_256-NEXT: .LBB33_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB33_11 |
| ; VBITS_GE_256-NEXT: .LBB33_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0] |
| ; VBITS_GE_256-NEXT: .LBB33_11: // %else26 |
| ; VBITS_GE_256-NEXT: ushll v0.8h, v0.8b, #0 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB33_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB33_6 |
| ; VBITS_GE_256-NEXT: .LBB33_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB33_7 |
| ; VBITS_GE_256-NEXT: .LBB33_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB33_8 |
| ; VBITS_GE_256-NEXT: .LBB33_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB33_9 |
| ; VBITS_GE_256-NEXT: .LBB33_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB33_10 |
| ; VBITS_GE_256-NEXT: b .LBB33_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_v8i8i64_m64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB33_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldrb w9, [x0], #1 |
| ; VBITS_GE_512-NEXT: fmov s0, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB33_3 |
| ; VBITS_GE_512-NEXT: b .LBB33_4 |
| ; VBITS_GE_512-NEXT: .LBB33_2: |
| ; VBITS_GE_512-NEXT: // implicit-def: $d0 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB33_4 |
| ; VBITS_GE_512-NEXT: .LBB33_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1 |
| ; VBITS_GE_512-NEXT: .LBB33_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB33_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB33_13 |
| ; VBITS_GE_512-NEXT: .LBB33_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB33_14 |
| ; VBITS_GE_512-NEXT: .LBB33_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB33_15 |
| ; VBITS_GE_512-NEXT: .LBB33_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB33_16 |
| ; VBITS_GE_512-NEXT: .LBB33_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB33_11 |
| ; VBITS_GE_512-NEXT: .LBB33_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0] |
| ; VBITS_GE_512-NEXT: .LBB33_11: // %else26 |
| ; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b |
| ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB33_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB33_6 |
| ; VBITS_GE_512-NEXT: .LBB33_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB33_7 |
| ; VBITS_GE_512-NEXT: .LBB33_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB33_8 |
| ; VBITS_GE_512-NEXT: .LBB33_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB33_9 |
| ; VBITS_GE_512-NEXT: .LBB33_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB33_10 |
| ; VBITS_GE_512-NEXT: b .LBB33_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v8i8i64_m64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z1.d, #0 |
| ; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: ptrue p1.s, vl4 |
| ; CHECK-EXPAND-NEXT: uzp1 z3.s, z0.s, z0.s |
| ; CHECK-EXPAND-NEXT: uzp1 z2.s, z1.s, z1.s |
| ; CHECK-EXPAND-NEXT: splice z0.s, p1, { z2.s, z3.s } |
| ; CHECK-EXPAND-NEXT: ptrue p1.b, vl8 |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-EXPAND-NEXT: cmpne p2.b, p1/z, z0.b, #0 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b |
| ; CHECK-EXPAND-NEXT: whilelo p1.b, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.b }, p1/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b |
| ; CHECK-EXPAND-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i64>, ptr %bp |
| %mask = icmp eq <8 x i64> %b, zeroinitializer |
| %load = call <8 x i8> @llvm.masked.expandload.v8i8(ptr %ap, <8 x i1> %mask, <8 x i8> poison) |
| %ext = zext <8 x i8> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v16i16i32_m32(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_v16i16i32_m32: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: adrp x8, .LCPI34_0 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, #0 |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b |
| ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] |
| ; VBITS_GE_256-NEXT: ldr q0, [x8, :lo12:.LCPI34_0] |
| ; VBITS_GE_256-NEXT: and v0.16b, v1.16b, v0.16b |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b |
| ; VBITS_GE_256-NEXT: addv h0, v0.8h |
| ; VBITS_GE_256-NEXT: fmov w9, s0 |
| ; VBITS_GE_256-NEXT: fmov w8, s0 |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB34_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rh { z0.h }, p1/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB34_3 |
| ; VBITS_GE_256-NEXT: b .LBB34_4 |
| ; VBITS_GE_256-NEXT: .LBB34_2: |
| ; VBITS_GE_256-NEXT: ptrue p2.h, vl16 |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI34_1 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI34_1 |
| ; VBITS_GE_256-NEXT: ld1h { z0.h }, p2/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB34_4 |
| ; VBITS_GE_256-NEXT: .LBB34_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB34_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB34_20 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB34_21 |
| ; VBITS_GE_256-NEXT: .LBB34_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB34_22 |
| ; VBITS_GE_256-NEXT: .LBB34_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB34_23 |
| ; VBITS_GE_256-NEXT: .LBB34_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB34_24 |
| ; VBITS_GE_256-NEXT: .LBB34_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB34_25 |
| ; VBITS_GE_256-NEXT: .LBB34_10: // %else26 |
| ; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB34_26 |
| ; VBITS_GE_256-NEXT: .LBB34_11: // %else30 |
| ; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB34_27 |
| ; VBITS_GE_256-NEXT: .LBB34_12: // %else34 |
| ; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB34_28 |
| ; VBITS_GE_256-NEXT: .LBB34_13: // %else38 |
| ; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB34_29 |
| ; VBITS_GE_256-NEXT: .LBB34_14: // %else42 |
| ; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB34_30 |
| ; VBITS_GE_256-NEXT: .LBB34_15: // %else46 |
| ; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB34_31 |
| ; VBITS_GE_256-NEXT: .LBB34_16: // %else50 |
| ; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB34_32 |
| ; VBITS_GE_256-NEXT: .LBB34_17: // %else54 |
| ; VBITS_GE_256-NEXT: tbz w8, #15, .LBB34_19 |
| ; VBITS_GE_256-NEXT: .LBB34_18: // %cond.load57 |
| ; VBITS_GE_256-NEXT: mov w8, #15 // =0xf |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w8 |
| ; VBITS_GE_256-NEXT: ldrh w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB34_19: // %else58 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB34_20: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB34_6 |
| ; VBITS_GE_256-NEXT: .LBB34_21: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB34_7 |
| ; VBITS_GE_256-NEXT: .LBB34_22: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB34_8 |
| ; VBITS_GE_256-NEXT: .LBB34_23: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB34_9 |
| ; VBITS_GE_256-NEXT: .LBB34_24: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB34_10 |
| ; VBITS_GE_256-NEXT: .LBB34_25: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #8, .LBB34_11 |
| ; VBITS_GE_256-NEXT: .LBB34_26: // %cond.load29 |
| ; VBITS_GE_256-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #9, .LBB34_12 |
| ; VBITS_GE_256-NEXT: .LBB34_27: // %cond.load33 |
| ; VBITS_GE_256-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #10, .LBB34_13 |
| ; VBITS_GE_256-NEXT: .LBB34_28: // %cond.load37 |
| ; VBITS_GE_256-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #11, .LBB34_14 |
| ; VBITS_GE_256-NEXT: .LBB34_29: // %cond.load41 |
| ; VBITS_GE_256-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #12, .LBB34_15 |
| ; VBITS_GE_256-NEXT: .LBB34_30: // %cond.load45 |
| ; VBITS_GE_256-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #13, .LBB34_16 |
| ; VBITS_GE_256-NEXT: .LBB34_31: // %cond.load49 |
| ; VBITS_GE_256-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #14, .LBB34_17 |
| ; VBITS_GE_256-NEXT: .LBB34_32: // %cond.load53 |
| ; VBITS_GE_256-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_256-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.h, w9 |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB34_18 |
| ; VBITS_GE_256-NEXT: b .LBB34_19 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_v16i16i32_m32: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p1.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[7] |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[8] |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[3] |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[4] |
| ; VBITS_GE_512-NEXT: umov w15, v0.b[10] |
| ; VBITS_GE_512-NEXT: umov w16, v0.b[5] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[9] |
| ; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1 |
| ; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[11] |
| ; VBITS_GE_512-NEXT: orr w11, w11, w12 |
| ; VBITS_GE_512-NEXT: umov w12, v0.b[13] |
| ; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[12] |
| ; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w14, v0.b[14] |
| ; VBITS_GE_512-NEXT: orr w9, w11, w9 |
| ; VBITS_GE_512-NEXT: umov w11, v0.b[6] |
| ; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w15 |
| ; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1 |
| ; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w13 |
| ; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1 |
| ; VBITS_GE_512-NEXT: umov w13, v0.b[15] |
| ; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w10 |
| ; VBITS_GE_512-NEXT: orr w9, w9, w12 |
| ; VBITS_GE_512-NEXT: orr w8, w8, w9 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xffff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB34_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rh { z0.h }, p1/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB34_3 |
| ; VBITS_GE_512-NEXT: b .LBB34_4 |
| ; VBITS_GE_512-NEXT: .LBB34_2: |
| ; VBITS_GE_512-NEXT: ptrue p2.h, vl16 |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI34_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI34_0 |
| ; VBITS_GE_512-NEXT: ld1h { z0.h }, p2/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB34_4 |
| ; VBITS_GE_512-NEXT: .LBB34_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB34_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB34_20 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB34_21 |
| ; VBITS_GE_512-NEXT: .LBB34_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB34_22 |
| ; VBITS_GE_512-NEXT: .LBB34_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB34_23 |
| ; VBITS_GE_512-NEXT: .LBB34_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB34_24 |
| ; VBITS_GE_512-NEXT: .LBB34_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB34_25 |
| ; VBITS_GE_512-NEXT: .LBB34_10: // %else26 |
| ; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB34_26 |
| ; VBITS_GE_512-NEXT: .LBB34_11: // %else30 |
| ; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB34_27 |
| ; VBITS_GE_512-NEXT: .LBB34_12: // %else34 |
| ; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB34_28 |
| ; VBITS_GE_512-NEXT: .LBB34_13: // %else38 |
| ; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB34_29 |
| ; VBITS_GE_512-NEXT: .LBB34_14: // %else42 |
| ; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB34_30 |
| ; VBITS_GE_512-NEXT: .LBB34_15: // %else46 |
| ; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB34_31 |
| ; VBITS_GE_512-NEXT: .LBB34_16: // %else50 |
| ; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB34_32 |
| ; VBITS_GE_512-NEXT: .LBB34_17: // %else54 |
| ; VBITS_GE_512-NEXT: tbz w8, #15, .LBB34_19 |
| ; VBITS_GE_512-NEXT: .LBB34_18: // %cond.load57 |
| ; VBITS_GE_512-NEXT: mov w8, #15 // =0xf |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w8 |
| ; VBITS_GE_512-NEXT: ldrh w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB34_19: // %else58 |
| ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB34_20: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB34_6 |
| ; VBITS_GE_512-NEXT: .LBB34_21: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB34_7 |
| ; VBITS_GE_512-NEXT: .LBB34_22: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB34_8 |
| ; VBITS_GE_512-NEXT: .LBB34_23: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB34_9 |
| ; VBITS_GE_512-NEXT: .LBB34_24: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB34_10 |
| ; VBITS_GE_512-NEXT: .LBB34_25: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #8, .LBB34_11 |
| ; VBITS_GE_512-NEXT: .LBB34_26: // %cond.load29 |
| ; VBITS_GE_512-NEXT: mov w9, #8 // =0x8 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #9, .LBB34_12 |
| ; VBITS_GE_512-NEXT: .LBB34_27: // %cond.load33 |
| ; VBITS_GE_512-NEXT: mov w9, #9 // =0x9 |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #10, .LBB34_13 |
| ; VBITS_GE_512-NEXT: .LBB34_28: // %cond.load37 |
| ; VBITS_GE_512-NEXT: mov w9, #10 // =0xa |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #11, .LBB34_14 |
| ; VBITS_GE_512-NEXT: .LBB34_29: // %cond.load41 |
| ; VBITS_GE_512-NEXT: mov w9, #11 // =0xb |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #12, .LBB34_15 |
| ; VBITS_GE_512-NEXT: .LBB34_30: // %cond.load45 |
| ; VBITS_GE_512-NEXT: mov w9, #12 // =0xc |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #13, .LBB34_16 |
| ; VBITS_GE_512-NEXT: .LBB34_31: // %cond.load49 |
| ; VBITS_GE_512-NEXT: mov w9, #13 // =0xd |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #14, .LBB34_17 |
| ; VBITS_GE_512-NEXT: .LBB34_32: // %cond.load53 |
| ; VBITS_GE_512-NEXT: mov w9, #14 // =0xe |
| ; VBITS_GE_512-NEXT: index z1.h, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.h, w9 |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h |
| ; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB34_18 |
| ; VBITS_GE_512-NEXT: b .LBB34_19 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v16i16i32_m32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.s, p0/z, z1.s, #0 |
| ; CHECK-EXPAND-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: ptrue p1.h, vl16 |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: uzp1 z1.h, z1.h, z1.h |
| ; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-EXPAND-NEXT: uzp1 z1.b, z1.b, z1.b |
| ; CHECK-EXPAND-NEXT: mov v1.d[1], v0.d[0] |
| ; CHECK-EXPAND-NEXT: sunpklo z0.h, z1.b |
| ; CHECK-EXPAND-NEXT: cmpne p2.h, p1/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.h |
| ; CHECK-EXPAND-NEXT: whilelo p1.h, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p1/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.h, p2, z0.h |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <16 x i32>, ptr %bp |
| %mask = icmp eq <16 x i32> %b, zeroinitializer |
| %load = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %ap, <16 x i1> %mask, <16 x i16> poison) |
| %ext = zext <16 x i16> %load to <16 x i32> |
| store <16 x i32> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v8i16i64_m64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_v8i16i64_m64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 |
| ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 |
| ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s |
| ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB35_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_256-NEXT: fmov s0, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB35_3 |
| ; VBITS_GE_256-NEXT: b .LBB35_4 |
| ; VBITS_GE_256-NEXT: .LBB35_2: |
| ; VBITS_GE_256-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB35_4 |
| ; VBITS_GE_256-NEXT: .LBB35_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[1], [x0], #2 |
| ; VBITS_GE_256-NEXT: .LBB35_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB35_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB35_13 |
| ; VBITS_GE_256-NEXT: .LBB35_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB35_14 |
| ; VBITS_GE_256-NEXT: .LBB35_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB35_15 |
| ; VBITS_GE_256-NEXT: .LBB35_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB35_16 |
| ; VBITS_GE_256-NEXT: .LBB35_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB35_11 |
| ; VBITS_GE_256-NEXT: .LBB35_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[7], [x0] |
| ; VBITS_GE_256-NEXT: .LBB35_11: // %else26 |
| ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h |
| ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB35_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[2], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB35_6 |
| ; VBITS_GE_256-NEXT: .LBB35_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[3], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB35_7 |
| ; VBITS_GE_256-NEXT: .LBB35_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[4], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB35_8 |
| ; VBITS_GE_256-NEXT: .LBB35_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[5], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB35_9 |
| ; VBITS_GE_256-NEXT: .LBB35_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: ld1 { v0.h }[6], [x0], #2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB35_10 |
| ; VBITS_GE_256-NEXT: b .LBB35_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_v8i16i64_m64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB35_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ldrh w9, [x0], #2 |
| ; VBITS_GE_512-NEXT: fmov s0, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB35_3 |
| ; VBITS_GE_512-NEXT: b .LBB35_4 |
| ; VBITS_GE_512-NEXT: .LBB35_2: |
| ; VBITS_GE_512-NEXT: // implicit-def: $q0 |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB35_4 |
| ; VBITS_GE_512-NEXT: .LBB35_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[1], [x0], #2 |
| ; VBITS_GE_512-NEXT: .LBB35_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB35_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB35_13 |
| ; VBITS_GE_512-NEXT: .LBB35_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB35_14 |
| ; VBITS_GE_512-NEXT: .LBB35_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB35_15 |
| ; VBITS_GE_512-NEXT: .LBB35_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB35_16 |
| ; VBITS_GE_512-NEXT: .LBB35_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB35_11 |
| ; VBITS_GE_512-NEXT: .LBB35_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[7], [x0] |
| ; VBITS_GE_512-NEXT: .LBB35_11: // %else26 |
| ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h |
| ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB35_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[2], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB35_6 |
| ; VBITS_GE_512-NEXT: .LBB35_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[3], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB35_7 |
| ; VBITS_GE_512-NEXT: .LBB35_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[4], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB35_8 |
| ; VBITS_GE_512-NEXT: .LBB35_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[5], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB35_9 |
| ; VBITS_GE_512-NEXT: .LBB35_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: ld1 { v0.h }[6], [x0], #2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB35_10 |
| ; VBITS_GE_512-NEXT: b .LBB35_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v8i16i64_m64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z1.d, #0 |
| ; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: ptrue p1.s, vl4 |
| ; CHECK-EXPAND-NEXT: uzp1 z3.s, z0.s, z0.s |
| ; CHECK-EXPAND-NEXT: uzp1 z2.s, z1.s, z1.s |
| ; CHECK-EXPAND-NEXT: splice z0.s, p1, { z2.s, z3.s } |
| ; CHECK-EXPAND-NEXT: ptrue p1.h, vl8 |
| ; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-EXPAND-NEXT: cmpne p2.h, p1/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.h |
| ; CHECK-EXPAND-NEXT: whilelo p1.h, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.h }, p1/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.h, p2, z0.h |
| ; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h |
| ; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i64>, ptr %bp |
| %mask = icmp eq <8 x i64> %b, zeroinitializer |
| %load = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %ap, <8 x i1> %mask, <8 x i16> poison) |
| %ext = zext <8 x i16> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v8i32i64_m64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_v8i32i64_m64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 |
| ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 |
| ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s |
| ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s |
| ; VBITS_GE_256-NEXT: ptrue p1.s |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB36_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rw { z0.s }, p1/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #4 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB36_3 |
| ; VBITS_GE_256-NEXT: b .LBB36_4 |
| ; VBITS_GE_256-NEXT: .LBB36_2: |
| ; VBITS_GE_256-NEXT: ptrue p2.s, vl8 |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI36_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI36_0 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p2/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB36_4 |
| ; VBITS_GE_256-NEXT: .LBB36_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB36_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB36_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB36_13 |
| ; VBITS_GE_256-NEXT: .LBB36_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB36_14 |
| ; VBITS_GE_256-NEXT: .LBB36_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB36_15 |
| ; VBITS_GE_256-NEXT: .LBB36_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB36_16 |
| ; VBITS_GE_256-NEXT: .LBB36_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB36_11 |
| ; VBITS_GE_256-NEXT: .LBB36_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w8 |
| ; VBITS_GE_256-NEXT: ldr w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB36_11: // %else26 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB36_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB36_6 |
| ; VBITS_GE_256-NEXT: .LBB36_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB36_7 |
| ; VBITS_GE_256-NEXT: .LBB36_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB36_8 |
| ; VBITS_GE_256-NEXT: .LBB36_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB36_9 |
| ; VBITS_GE_256-NEXT: .LBB36_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB36_10 |
| ; VBITS_GE_256-NEXT: b .LBB36_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_v8i32i64_m64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p1.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB36_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rw { z0.s }, p1/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #4 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB36_3 |
| ; VBITS_GE_512-NEXT: b .LBB36_4 |
| ; VBITS_GE_512-NEXT: .LBB36_2: |
| ; VBITS_GE_512-NEXT: ptrue p2.s, vl8 |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI36_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI36_0 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p2/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB36_4 |
| ; VBITS_GE_512-NEXT: .LBB36_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB36_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB36_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB36_13 |
| ; VBITS_GE_512-NEXT: .LBB36_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB36_14 |
| ; VBITS_GE_512-NEXT: .LBB36_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB36_15 |
| ; VBITS_GE_512-NEXT: .LBB36_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB36_16 |
| ; VBITS_GE_512-NEXT: .LBB36_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB36_11 |
| ; VBITS_GE_512-NEXT: .LBB36_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w8 |
| ; VBITS_GE_512-NEXT: ldr w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB36_11: // %else26 |
| ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB36_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB36_6 |
| ; VBITS_GE_512-NEXT: .LBB36_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB36_7 |
| ; VBITS_GE_512-NEXT: .LBB36_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB36_8 |
| ; VBITS_GE_512-NEXT: .LBB36_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB36_9 |
| ; VBITS_GE_512-NEXT: .LBB36_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB36_10 |
| ; VBITS_GE_512-NEXT: b .LBB36_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v8i32i64_m64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z1.d, #0 |
| ; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff |
| ; CHECK-EXPAND-NEXT: ptrue p1.s, vl4 |
| ; CHECK-EXPAND-NEXT: uzp1 z3.s, z0.s, z0.s |
| ; CHECK-EXPAND-NEXT: uzp1 z2.s, z1.s, z1.s |
| ; CHECK-EXPAND-NEXT: splice z0.s, p1, { z2.s, z3.s } |
| ; CHECK-EXPAND-NEXT: ptrue p1.s, vl8 |
| ; CHECK-EXPAND-NEXT: cmpne p2.s, p1/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cntp x9, p2, p2.s |
| ; CHECK-EXPAND-NEXT: whilelo p1.s, xzr, x9 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p1/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.s, p2, z0.s |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i64>, ptr %bp |
| %mask = icmp eq <8 x i64> %b, zeroinitializer |
| %load = call <8 x i32> @llvm.masked.expandload.v8i32(ptr %ap, <8 x i1> %mask, <8 x i32> poison) |
| %ext = zext <8 x i32> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v128i8i16(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_sext_v128i8i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p1.b, vl128 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x1] |
| ; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, #0 |
| ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: umov w9, v1.b[1] |
| ; CHECK-NEXT: fmov w8, s1 |
| ; CHECK-NEXT: mov z0.b, z1.b[18] |
| ; CHECK-NEXT: umov w10, v1.b[7] |
| ; CHECK-NEXT: umov w11, v1.b[8] |
| ; CHECK-NEXT: mov z2.b, z1.b[19] |
| ; CHECK-NEXT: umov w12, v1.b[2] |
| ; CHECK-NEXT: umov w14, v1.b[9] |
| ; CHECK-NEXT: umov w13, v1.b[3] |
| ; CHECK-NEXT: and x8, x8, #0x1 |
| ; CHECK-NEXT: fmov w16, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[20] |
| ; CHECK-NEXT: bfi x8, x9, #1, #1 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: umov w15, v1.b[10] |
| ; CHECK-NEXT: ubfiz x10, x10, #7, #1 |
| ; CHECK-NEXT: ubfiz x11, x11, #8, #1 |
| ; CHECK-NEXT: mov z2.b, z1.b[21] |
| ; CHECK-NEXT: bfi x8, x12, #2, #1 |
| ; CHECK-NEXT: fmov w12, s0 |
| ; CHECK-NEXT: ubfiz x16, x16, #18, #1 |
| ; CHECK-NEXT: ubfiz x9, x9, #19, #1 |
| ; CHECK-NEXT: ubfiz x14, x14, #9, #1 |
| ; CHECK-NEXT: orr x10, x10, x11 |
| ; CHECK-NEXT: umov w11, v1.b[11] |
| ; CHECK-NEXT: mov z0.b, z1.b[22] |
| ; CHECK-NEXT: ubfiz x15, x15, #10, #1 |
| ; CHECK-NEXT: ubfiz x12, x12, #20, #1 |
| ; CHECK-NEXT: orr x9, x16, x9 |
| ; CHECK-NEXT: orr x10, x10, x14 |
| ; CHECK-NEXT: fmov w14, s2 |
| ; CHECK-NEXT: bfi x8, x13, #3, #1 |
| ; CHECK-NEXT: orr x10, x10, x15 |
| ; CHECK-NEXT: orr x9, x9, x12 |
| ; CHECK-NEXT: umov w12, v1.b[12] |
| ; CHECK-NEXT: fmov w13, s0 |
| ; CHECK-NEXT: ubfiz x11, x11, #11, #1 |
| ; CHECK-NEXT: umov w15, v1.b[13] |
| ; CHECK-NEXT: mov z0.b, z1.b[16] |
| ; CHECK-NEXT: ubfiz x14, x14, #21, #1 |
| ; CHECK-NEXT: mov z2.b, z1.b[17] |
| ; CHECK-NEXT: umov w16, v1.b[4] |
| ; CHECK-NEXT: ubfiz x13, x13, #22, #1 |
| ; CHECK-NEXT: orr x10, x10, x11 |
| ; CHECK-NEXT: umov w11, v1.b[14] |
| ; CHECK-NEXT: orr x9, x9, x14 |
| ; CHECK-NEXT: ubfiz x12, x12, #12, #1 |
| ; CHECK-NEXT: umov w14, v1.b[5] |
| ; CHECK-NEXT: orr x9, x9, x13 |
| ; CHECK-NEXT: umov w13, v1.b[15] |
| ; CHECK-NEXT: ubfiz x15, x15, #13, #1 |
| ; CHECK-NEXT: orr x10, x10, x12 |
| ; CHECK-NEXT: fmov w12, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[23] |
| ; CHECK-NEXT: ubfiz x11, x11, #14, #1 |
| ; CHECK-NEXT: orr x10, x10, x15 |
| ; CHECK-NEXT: fmov w15, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[24] |
| ; CHECK-NEXT: bfi x8, x16, #4, #1 |
| ; CHECK-NEXT: umov w16, v1.b[6] |
| ; CHECK-NEXT: ubfiz x13, x13, #15, #1 |
| ; CHECK-NEXT: orr x10, x10, x11 |
| ; CHECK-NEXT: fmov w11, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[25] |
| ; CHECK-NEXT: ubfiz x12, x12, #16, #1 |
| ; CHECK-NEXT: bfi x8, x14, #5, #1 |
| ; CHECK-NEXT: orr x10, x10, x13 |
| ; CHECK-NEXT: fmov w13, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[26] |
| ; CHECK-NEXT: ubfiz x11, x11, #23, #1 |
| ; CHECK-NEXT: orr x10, x10, x12 |
| ; CHECK-NEXT: ubfiz x14, x15, #17, #1 |
| ; CHECK-NEXT: fmov w12, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[27] |
| ; CHECK-NEXT: bfi x8, x16, #6, #1 |
| ; CHECK-NEXT: ubfiz x13, x13, #24, #1 |
| ; CHECK-NEXT: orr x9, x9, x11 |
| ; CHECK-NEXT: fmov w11, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[28] |
| ; CHECK-NEXT: orr x10, x10, x14 |
| ; CHECK-NEXT: orr x9, x9, x13 |
| ; CHECK-NEXT: ubfiz x12, x12, #25, #1 |
| ; CHECK-NEXT: fmov w13, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[29] |
| ; CHECK-NEXT: ubfiz x11, x11, #26, #1 |
| ; CHECK-NEXT: orr x8, x8, x10 |
| ; CHECK-NEXT: orr x9, x9, x12 |
| ; CHECK-NEXT: fmov w12, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[30] |
| ; CHECK-NEXT: orr x9, x9, x11 |
| ; CHECK-NEXT: ubfiz x11, x13, #27, #1 |
| ; CHECK-NEXT: fmov w13, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[31] |
| ; CHECK-NEXT: orr x9, x9, x11 |
| ; CHECK-NEXT: ubfiz x12, x12, #28, #1 |
| ; CHECK-NEXT: ubfiz x11, x13, #29, #1 |
| ; CHECK-NEXT: fmov w13, s2 |
| ; CHECK-NEXT: orr x9, x9, x12 |
| ; CHECK-NEXT: mov z2.b, z1.b[32] |
| ; CHECK-NEXT: fmov w10, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[33] |
| ; CHECK-NEXT: orr x9, x9, x11 |
| ; CHECK-NEXT: ubfiz x12, x13, #30, #1 |
| ; CHECK-NEXT: lsl w10, w10, #31 |
| ; CHECK-NEXT: orr x9, x9, x12 |
| ; CHECK-NEXT: orr x8, x8, x9 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[34] |
| ; CHECK-NEXT: orr x8, x8, x10 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #32 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[35] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #33 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[36] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #34 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[37] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #35 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[38] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #36 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[39] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #37 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[40] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #38 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[41] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #39 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[42] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #40 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[43] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #41 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[44] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #42 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[45] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #43 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[46] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #44 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[47] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #45 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[48] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #46 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[49] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #47 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[50] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #48 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[51] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #49 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[52] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #50 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[53] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #51 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[54] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #52 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[55] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #53 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[56] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #54 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[57] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #55 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[58] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #56 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[59] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #57 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[60] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #58 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[61] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: fmov w10, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[63] |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #59 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[62] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #60 |
| ; CHECK-NEXT: and w9, w10, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #61 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #62 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #63 |
| ; CHECK-NEXT: tbz w8, #0, .LBB37_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] |
| ; CHECK-NEXT: add x0, x0, #1 |
| ; CHECK-NEXT: tbnz w8, #1, .LBB37_3 |
| ; CHECK-NEXT: b .LBB37_4 |
| ; CHECK-NEXT: .LBB37_2: |
| ; CHECK-NEXT: adrp x9, .LCPI37_0 |
| ; CHECK-NEXT: add x9, x9, :lo12:.LCPI37_0 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9] |
| ; CHECK-NEXT: tbz w8, #1, .LBB37_4 |
| ; CHECK-NEXT: .LBB37_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: .LBB37_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB37_181 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB37_182 |
| ; CHECK-NEXT: .LBB37_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB37_183 |
| ; CHECK-NEXT: .LBB37_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB37_184 |
| ; CHECK-NEXT: .LBB37_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB37_185 |
| ; CHECK-NEXT: .LBB37_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB37_186 |
| ; CHECK-NEXT: .LBB37_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB37_187 |
| ; CHECK-NEXT: .LBB37_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB37_188 |
| ; CHECK-NEXT: .LBB37_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB37_189 |
| ; CHECK-NEXT: .LBB37_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB37_190 |
| ; CHECK-NEXT: .LBB37_14: // %else42 |
| ; CHECK-NEXT: tbz w8, #12, .LBB37_16 |
| ; CHECK-NEXT: .LBB37_15: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: .LBB37_16: // %else46 |
| ; CHECK-NEXT: mov w12, #71 // =0x47 |
| ; CHECK-NEXT: mov w14, #72 // =0x48 |
| ; CHECK-NEXT: mov w9, #83 // =0x53 |
| ; CHECK-NEXT: mov w10, #84 // =0x54 |
| ; CHECK-NEXT: tbz w8, #13, .LBB37_18 |
| ; CHECK-NEXT: // %bb.17: // %cond.load49 |
| ; CHECK-NEXT: mov w11, #13 // =0xd |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w11 |
| ; CHECK-NEXT: ldrb w11, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w11 |
| ; CHECK-NEXT: .LBB37_18: // %else50 |
| ; CHECK-NEXT: mov w11, #73 // =0x49 |
| ; CHECK-NEXT: mov w13, #85 // =0x55 |
| ; CHECK-NEXT: tbz w8, #14, .LBB37_20 |
| ; CHECK-NEXT: // %bb.19: // %cond.load53 |
| ; CHECK-NEXT: mov w15, #14 // =0xe |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w15 |
| ; CHECK-NEXT: ldrb w15, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w15 |
| ; CHECK-NEXT: .LBB37_20: // %else54 |
| ; CHECK-NEXT: whilels p3.b, xzr, x12 |
| ; CHECK-NEXT: whilels p4.b, xzr, x14 |
| ; CHECK-NEXT: mov w14, #86 // =0x56 |
| ; CHECK-NEXT: whilels p1.b, xzr, x9 |
| ; CHECK-NEXT: mov w9, #74 // =0x4a |
| ; CHECK-NEXT: whilels p2.b, xzr, x10 |
| ; CHECK-NEXT: tbz w8, #15, .LBB37_22 |
| ; CHECK-NEXT: // %bb.21: // %cond.load57 |
| ; CHECK-NEXT: mov w10, #15 // =0xf |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w10 |
| ; CHECK-NEXT: ldrb w10, [x0], #1 |
| ; CHECK-NEXT: cmpeq p5.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p5/m, w10 |
| ; CHECK-NEXT: .LBB37_22: // %else58 |
| ; CHECK-NEXT: lastb w10, p3, z1.b |
| ; CHECK-NEXT: mov w1, #75 // =0x4b |
| ; CHECK-NEXT: mov w17, #87 // =0x57 |
| ; CHECK-NEXT: lastb w12, p4, z1.b |
| ; CHECK-NEXT: lastb w15, p1, z1.b |
| ; CHECK-NEXT: lastb w16, p2, z1.b |
| ; CHECK-NEXT: whilels p2.b, xzr, x11 |
| ; CHECK-NEXT: whilels p1.b, xzr, x13 |
| ; CHECK-NEXT: tbz w8, #16, .LBB37_24 |
| ; CHECK-NEXT: // %bb.23: // %cond.load61 |
| ; CHECK-NEXT: mov w11, #16 // =0x10 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w11 |
| ; CHECK-NEXT: ldrb w11, [x0], #1 |
| ; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p3/m, w11 |
| ; CHECK-NEXT: .LBB37_24: // %else62 |
| ; CHECK-NEXT: lastb w11, p2, z1.b |
| ; CHECK-NEXT: mov w3, #76 // =0x4c |
| ; CHECK-NEXT: mov w18, #88 // =0x58 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: whilels p2.b, xzr, x9 |
| ; CHECK-NEXT: whilels p1.b, xzr, x14 |
| ; CHECK-NEXT: tbz w8, #17, .LBB37_26 |
| ; CHECK-NEXT: // %bb.25: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p3/m, w9 |
| ; CHECK-NEXT: .LBB37_26: // %else66 |
| ; CHECK-NEXT: lastb w9, p2, z1.b |
| ; CHECK-NEXT: ubfiz x5, x10, #7, #1 |
| ; CHECK-NEXT: ubfiz x7, x12, #8, #1 |
| ; CHECK-NEXT: ubfiz x4, x15, #19, #1 |
| ; CHECK-NEXT: ubfiz x6, x16, #20, #1 |
| ; CHECK-NEXT: mov w15, #89 // =0x59 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: whilels p2.b, xzr, x1 |
| ; CHECK-NEXT: mov w1, #77 // =0x4d |
| ; CHECK-NEXT: whilels p1.b, xzr, x17 |
| ; CHECK-NEXT: mov w17, #64 // =0x40 |
| ; CHECK-NEXT: tbz w8, #18, .LBB37_28 |
| ; CHECK-NEXT: // %bb.27: // %cond.load69 |
| ; CHECK-NEXT: mov w10, #18 // =0x12 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w10 |
| ; CHECK-NEXT: ldrb w10, [x0], #1 |
| ; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p3/m, w10 |
| ; CHECK-NEXT: .LBB37_28: // %else70 |
| ; CHECK-NEXT: sub sp, sp, #64 |
| ; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 64 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: lastb w10, p2, z1.b |
| ; CHECK-NEXT: orr x7, x5, x7 |
| ; CHECK-NEXT: ubfiz x5, x13, #21, #1 |
| ; CHECK-NEXT: mov w16, #65 // =0x41 |
| ; CHECK-NEXT: orr x19, x4, x6 |
| ; CHECK-NEXT: mov w4, #90 // =0x5a |
| ; CHECK-NEXT: lastb w12, p1, z1.b |
| ; CHECK-NEXT: whilels p2.b, xzr, x3 |
| ; CHECK-NEXT: ubfiz x3, x11, #9, #1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x18 |
| ; CHECK-NEXT: mov w18, #78 // =0x4e |
| ; CHECK-NEXT: tbz w8, #19, .LBB37_30 |
| ; CHECK-NEXT: // %bb.29: // %cond.load73 |
| ; CHECK-NEXT: mov w11, #19 // =0x13 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w11 |
| ; CHECK-NEXT: ldrb w11, [x0], #1 |
| ; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p3/m, w11 |
| ; CHECK-NEXT: .LBB37_30: // %else74 |
| ; CHECK-NEXT: lastb w11, p2, z1.b |
| ; CHECK-NEXT: ubfiz x21, x9, #10, #1 |
| ; CHECK-NEXT: ubfiz x6, x14, #22, #1 |
| ; CHECK-NEXT: orr x7, x7, x3 |
| ; CHECK-NEXT: mov w3, #79 // =0x4f |
| ; CHECK-NEXT: orr x20, x19, x5 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: mov w5, #91 // =0x5b |
| ; CHECK-NEXT: whilels p3.b, xzr, x17 |
| ; CHECK-NEXT: mov w17, #66 // =0x42 |
| ; CHECK-NEXT: whilels p2.b, xzr, x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x15 |
| ; CHECK-NEXT: tbz w8, #20, .LBB37_32 |
| ; CHECK-NEXT: // %bb.31: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p4/m, w9 |
| ; CHECK-NEXT: .LBB37_32: // %else78 |
| ; CHECK-NEXT: lastb w9, p3, z1.b |
| ; CHECK-NEXT: orr x19, x7, x21 |
| ; CHECK-NEXT: ubfiz x21, x10, #11, #1 |
| ; CHECK-NEXT: ubfiz x7, x12, #23, #1 |
| ; CHECK-NEXT: mov w1, #67 // =0x43 |
| ; CHECK-NEXT: orr x22, x20, x6 |
| ; CHECK-NEXT: lastb w14, p2, z1.b |
| ; CHECK-NEXT: mov w6, #92 // =0x5c |
| ; CHECK-NEXT: lastb w15, p1, z1.b |
| ; CHECK-NEXT: whilels p3.b, xzr, x16 |
| ; CHECK-NEXT: whilels p2.b, xzr, x18 |
| ; CHECK-NEXT: whilels p1.b, xzr, x4 |
| ; CHECK-NEXT: mov w4, #80 // =0x50 |
| ; CHECK-NEXT: tbz w8, #21, .LBB37_34 |
| ; CHECK-NEXT: // %bb.33: // %cond.load81 |
| ; CHECK-NEXT: mov w10, #21 // =0x15 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w10 |
| ; CHECK-NEXT: ldrb w10, [x0], #1 |
| ; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p4/m, w10 |
| ; CHECK-NEXT: .LBB37_34: // %else82 |
| ; CHECK-NEXT: lastb w10, p3, z1.b |
| ; CHECK-NEXT: orr x20, x19, x21 |
| ; CHECK-NEXT: ubfiz x21, x11, #12, #1 |
| ; CHECK-NEXT: ubfiz x19, x13, #24, #1 |
| ; CHECK-NEXT: mov w18, #68 // =0x44 |
| ; CHECK-NEXT: orr x23, x22, x7 |
| ; CHECK-NEXT: lastb w12, p2, z1.b |
| ; CHECK-NEXT: mov w7, #93 // =0x5d |
| ; CHECK-NEXT: lastb w16, p1, z1.b |
| ; CHECK-NEXT: whilels p3.b, xzr, x17 |
| ; CHECK-NEXT: whilels p2.b, xzr, x3 |
| ; CHECK-NEXT: whilels p1.b, xzr, x5 |
| ; CHECK-NEXT: mov w5, #81 // =0x51 |
| ; CHECK-NEXT: tbz w8, #22, .LBB37_36 |
| ; CHECK-NEXT: // %bb.35: // %cond.load85 |
| ; CHECK-NEXT: mov w11, #22 // =0x16 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w11 |
| ; CHECK-NEXT: ldrb w11, [x0], #1 |
| ; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p4/m, w11 |
| ; CHECK-NEXT: .LBB37_36: // %else86 |
| ; CHECK-NEXT: lastb w11, p3, z1.b |
| ; CHECK-NEXT: orr x20, x20, x21 |
| ; CHECK-NEXT: ubfiz x21, x14, #13, #1 |
| ; CHECK-NEXT: ubfiz x22, x15, #25, #1 |
| ; CHECK-NEXT: and x9, x9, #0x1 |
| ; CHECK-NEXT: mov w3, #69 // =0x45 |
| ; CHECK-NEXT: lastb w13, p2, z1.b |
| ; CHECK-NEXT: orr x24, x23, x19 |
| ; CHECK-NEXT: mov w19, #94 // =0x5e |
| ; CHECK-NEXT: lastb w17, p1, z1.b |
| ; CHECK-NEXT: whilels p3.b, xzr, x1 |
| ; CHECK-NEXT: whilels p2.b, xzr, x4 |
| ; CHECK-NEXT: mov w4, #82 // =0x52 |
| ; CHECK-NEXT: whilels p1.b, xzr, x6 |
| ; CHECK-NEXT: mov w6, #95 // =0x5f |
| ; CHECK-NEXT: tbz w8, #23, .LBB37_38 |
| ; CHECK-NEXT: // %bb.37: // %cond.load89 |
| ; CHECK-NEXT: mov w14, #23 // =0x17 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p4/m, w14 |
| ; CHECK-NEXT: .LBB37_38: // %else90 |
| ; CHECK-NEXT: lastb w14, p3, z1.b |
| ; CHECK-NEXT: bfi x9, x10, #1, #1 |
| ; CHECK-NEXT: ubfiz x23, x16, #26, #1 |
| ; CHECK-NEXT: lastb w15, p2, z1.b |
| ; CHECK-NEXT: lastb w1, p1, z1.b |
| ; CHECK-NEXT: whilels p3.b, xzr, x18 |
| ; CHECK-NEXT: whilels p2.b, xzr, x5 |
| ; CHECK-NEXT: ubfiz x5, x12, #14, #1 |
| ; CHECK-NEXT: mov w12, #70 // =0x46 |
| ; CHECK-NEXT: whilels p1.b, xzr, x7 |
| ; CHECK-NEXT: orr x7, x20, x21 |
| ; CHECK-NEXT: orr x20, x24, x22 |
| ; CHECK-NEXT: tbz w8, #24, .LBB37_40 |
| ; CHECK-NEXT: // %bb.39: // %cond.load93 |
| ; CHECK-NEXT: mov w10, #24 // =0x18 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w10 |
| ; CHECK-NEXT: ldrb w10, [x0], #1 |
| ; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p4/m, w10 |
| ; CHECK-NEXT: .LBB37_40: // %else94 |
| ; CHECK-NEXT: lastb w10, p3, z1.b |
| ; CHECK-NEXT: bfi x9, x11, #2, #1 |
| ; CHECK-NEXT: orr x5, x7, x5 |
| ; CHECK-NEXT: lastb w16, p2, z1.b |
| ; CHECK-NEXT: lastb w18, p1, z1.b |
| ; CHECK-NEXT: whilels p4.b, xzr, x3 |
| ; CHECK-NEXT: ubfiz x3, x13, #15, #1 |
| ; CHECK-NEXT: whilels p2.b, xzr, x4 |
| ; CHECK-NEXT: ubfiz x4, x17, #27, #1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x19 |
| ; CHECK-NEXT: whilels p3.b, xzr, x6 |
| ; CHECK-NEXT: orr x6, x20, x23 |
| ; CHECK-NEXT: tbz w8, #25, .LBB37_42 |
| ; CHECK-NEXT: // %bb.41: // %cond.load97 |
| ; CHECK-NEXT: mov w11, #25 // =0x19 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w11 |
| ; CHECK-NEXT: ldrb w11, [x0], #1 |
| ; CHECK-NEXT: cmpeq p5.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p5/m, w11 |
| ; CHECK-NEXT: .LBB37_42: // %else98 |
| ; CHECK-NEXT: lastb w11, p4, z1.b |
| ; CHECK-NEXT: bfi x9, x14, #3, #1 |
| ; CHECK-NEXT: ubfiz x15, x15, #16, #1 |
| ; CHECK-NEXT: ubfiz x1, x1, #28, #1 |
| ; CHECK-NEXT: orr x3, x5, x3 |
| ; CHECK-NEXT: orr x4, x6, x4 |
| ; CHECK-NEXT: lastb w13, p2, z1.b |
| ; CHECK-NEXT: mov w14, #96 // =0x60 |
| ; CHECK-NEXT: lastb w17, p1, z1.b |
| ; CHECK-NEXT: lastb w7, p3, z1.b |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: tbz w8, #26, .LBB37_44 |
| ; CHECK-NEXT: // %bb.43: // %cond.load101 |
| ; CHECK-NEXT: mov w12, #26 // =0x1a |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w12 |
| ; CHECK-NEXT: ldrb w12, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w12 |
| ; CHECK-NEXT: .LBB37_44: // %else102 |
| ; CHECK-NEXT: lastb w12, p1, z1.b |
| ; CHECK-NEXT: bfi x9, x10, #4, #1 |
| ; CHECK-NEXT: ubfiz x16, x16, #17, #1 |
| ; CHECK-NEXT: ubfiz x18, x18, #29, #1 |
| ; CHECK-NEXT: lsl w10, w7, #31 |
| ; CHECK-NEXT: orr x3, x3, x15 |
| ; CHECK-NEXT: orr x1, x4, x1 |
| ; CHECK-NEXT: mov w15, #97 // =0x61 |
| ; CHECK-NEXT: tbz w8, #27, .LBB37_46 |
| ; CHECK-NEXT: // %bb.45: // %cond.load105 |
| ; CHECK-NEXT: mov w4, #27 // =0x1b |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w4 |
| ; CHECK-NEXT: ldrb w4, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w4 |
| ; CHECK-NEXT: .LBB37_46: // %else106 |
| ; CHECK-NEXT: bfi x9, x11, #5, #1 |
| ; CHECK-NEXT: ubfiz x13, x13, #18, #1 |
| ; CHECK-NEXT: ubfiz x17, x17, #30, #1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x14 |
| ; CHECK-NEXT: orr x16, x3, x16 |
| ; CHECK-NEXT: orr x18, x1, x18 |
| ; CHECK-NEXT: mov w11, #98 // =0x62 |
| ; CHECK-NEXT: tbz w8, #28, .LBB37_48 |
| ; CHECK-NEXT: // %bb.47: // %cond.load109 |
| ; CHECK-NEXT: mov w14, #28 // =0x1c |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_48: // %else110 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: bfi x9, x12, #6, #1 |
| ; CHECK-NEXT: mov w12, #99 // =0x63 |
| ; CHECK-NEXT: whilels p1.b, xzr, x15 |
| ; CHECK-NEXT: orr x15, x16, x13 |
| ; CHECK-NEXT: orr x16, x18, x17 |
| ; CHECK-NEXT: tbz w8, #29, .LBB37_50 |
| ; CHECK-NEXT: // %bb.49: // %cond.load113 |
| ; CHECK-NEXT: mov w13, #29 // =0x1d |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_50: // %else114 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x9, x15 |
| ; CHECK-NEXT: orr x10, x16, x10 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #100 // =0x64 |
| ; CHECK-NEXT: tbz w8, #30, .LBB37_52 |
| ; CHECK-NEXT: // %bb.51: // %cond.load117 |
| ; CHECK-NEXT: mov w14, #30 // =0x1e |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_52: // %else118 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #101 // =0x65 |
| ; CHECK-NEXT: tbz w8, #31, .LBB37_54 |
| ; CHECK-NEXT: // %bb.53: // %cond.load121 |
| ; CHECK-NEXT: mov w13, #31 // =0x1f |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_54: // %else122 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #32 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #102 // =0x66 |
| ; CHECK-NEXT: tbz x8, #32, .LBB37_56 |
| ; CHECK-NEXT: // %bb.55: // %cond.load125 |
| ; CHECK-NEXT: mov w14, #32 // =0x20 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_56: // %else126 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #33 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #103 // =0x67 |
| ; CHECK-NEXT: tbz x8, #33, .LBB37_58 |
| ; CHECK-NEXT: // %bb.57: // %cond.load129 |
| ; CHECK-NEXT: mov w13, #33 // =0x21 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_58: // %else130 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #34 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #104 // =0x68 |
| ; CHECK-NEXT: tbz x8, #34, .LBB37_60 |
| ; CHECK-NEXT: // %bb.59: // %cond.load133 |
| ; CHECK-NEXT: mov w14, #34 // =0x22 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_60: // %else134 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #35 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #105 // =0x69 |
| ; CHECK-NEXT: tbz x8, #35, .LBB37_62 |
| ; CHECK-NEXT: // %bb.61: // %cond.load137 |
| ; CHECK-NEXT: mov w13, #35 // =0x23 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_62: // %else138 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #36 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #106 // =0x6a |
| ; CHECK-NEXT: tbz x8, #36, .LBB37_64 |
| ; CHECK-NEXT: // %bb.63: // %cond.load141 |
| ; CHECK-NEXT: mov w14, #36 // =0x24 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_64: // %else142 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #37 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #107 // =0x6b |
| ; CHECK-NEXT: tbz x8, #37, .LBB37_66 |
| ; CHECK-NEXT: // %bb.65: // %cond.load145 |
| ; CHECK-NEXT: mov w13, #37 // =0x25 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_66: // %else146 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #38 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #108 // =0x6c |
| ; CHECK-NEXT: tbz x8, #38, .LBB37_68 |
| ; CHECK-NEXT: // %bb.67: // %cond.load149 |
| ; CHECK-NEXT: mov w14, #38 // =0x26 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_68: // %else150 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #39 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #109 // =0x6d |
| ; CHECK-NEXT: tbz x8, #39, .LBB37_70 |
| ; CHECK-NEXT: // %bb.69: // %cond.load153 |
| ; CHECK-NEXT: mov w13, #39 // =0x27 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_70: // %else154 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #40 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #110 // =0x6e |
| ; CHECK-NEXT: tbz x8, #40, .LBB37_72 |
| ; CHECK-NEXT: // %bb.71: // %cond.load157 |
| ; CHECK-NEXT: mov w14, #40 // =0x28 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_72: // %else158 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #41 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #111 // =0x6f |
| ; CHECK-NEXT: tbz x8, #41, .LBB37_74 |
| ; CHECK-NEXT: // %bb.73: // %cond.load161 |
| ; CHECK-NEXT: mov w13, #41 // =0x29 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_74: // %else162 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #42 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #112 // =0x70 |
| ; CHECK-NEXT: tbz x8, #42, .LBB37_76 |
| ; CHECK-NEXT: // %bb.75: // %cond.load165 |
| ; CHECK-NEXT: mov w14, #42 // =0x2a |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_76: // %else166 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #43 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #113 // =0x71 |
| ; CHECK-NEXT: tbz x8, #43, .LBB37_78 |
| ; CHECK-NEXT: // %bb.77: // %cond.load169 |
| ; CHECK-NEXT: mov w13, #43 // =0x2b |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_78: // %else170 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #44 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #114 // =0x72 |
| ; CHECK-NEXT: tbz x8, #44, .LBB37_80 |
| ; CHECK-NEXT: // %bb.79: // %cond.load173 |
| ; CHECK-NEXT: mov w14, #44 // =0x2c |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_80: // %else174 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #45 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #115 // =0x73 |
| ; CHECK-NEXT: tbz x8, #45, .LBB37_82 |
| ; CHECK-NEXT: // %bb.81: // %cond.load177 |
| ; CHECK-NEXT: mov w13, #45 // =0x2d |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_82: // %else178 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #46 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #116 // =0x74 |
| ; CHECK-NEXT: tbz x8, #46, .LBB37_84 |
| ; CHECK-NEXT: // %bb.83: // %cond.load181 |
| ; CHECK-NEXT: mov w14, #46 // =0x2e |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_84: // %else182 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #47 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #117 // =0x75 |
| ; CHECK-NEXT: tbz x8, #47, .LBB37_86 |
| ; CHECK-NEXT: // %bb.85: // %cond.load185 |
| ; CHECK-NEXT: mov w13, #47 // =0x2f |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_86: // %else186 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #48 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #118 // =0x76 |
| ; CHECK-NEXT: tbz x8, #48, .LBB37_88 |
| ; CHECK-NEXT: // %bb.87: // %cond.load189 |
| ; CHECK-NEXT: mov w14, #48 // =0x30 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_88: // %else190 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #49 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #119 // =0x77 |
| ; CHECK-NEXT: tbz x8, #49, .LBB37_90 |
| ; CHECK-NEXT: // %bb.89: // %cond.load193 |
| ; CHECK-NEXT: mov w13, #49 // =0x31 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_90: // %else194 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #50 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #120 // =0x78 |
| ; CHECK-NEXT: tbz x8, #50, .LBB37_92 |
| ; CHECK-NEXT: // %bb.91: // %cond.load197 |
| ; CHECK-NEXT: mov w14, #50 // =0x32 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_92: // %else198 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #51 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #121 // =0x79 |
| ; CHECK-NEXT: tbz x8, #51, .LBB37_94 |
| ; CHECK-NEXT: // %bb.93: // %cond.load201 |
| ; CHECK-NEXT: mov w13, #51 // =0x33 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_94: // %else202 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #52 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #122 // =0x7a |
| ; CHECK-NEXT: tbz x8, #52, .LBB37_96 |
| ; CHECK-NEXT: // %bb.95: // %cond.load205 |
| ; CHECK-NEXT: mov w14, #52 // =0x34 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_96: // %else206 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #53 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #123 // =0x7b |
| ; CHECK-NEXT: tbz x8, #53, .LBB37_98 |
| ; CHECK-NEXT: // %bb.97: // %cond.load209 |
| ; CHECK-NEXT: mov w13, #53 // =0x35 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_98: // %else210 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #54 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #124 // =0x7c |
| ; CHECK-NEXT: tbz x8, #54, .LBB37_100 |
| ; CHECK-NEXT: // %bb.99: // %cond.load213 |
| ; CHECK-NEXT: mov w14, #54 // =0x36 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_100: // %else214 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #55 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #125 // =0x7d |
| ; CHECK-NEXT: tbz x8, #55, .LBB37_102 |
| ; CHECK-NEXT: // %bb.101: // %cond.load217 |
| ; CHECK-NEXT: mov w13, #55 // =0x37 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB37_102: // %else218 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #56 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #126 // =0x7e |
| ; CHECK-NEXT: tbz x8, #56, .LBB37_104 |
| ; CHECK-NEXT: // %bb.103: // %cond.load221 |
| ; CHECK-NEXT: mov w14, #56 // =0x38 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_104: // %else222 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #57 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: tbz x8, #57, .LBB37_106 |
| ; CHECK-NEXT: // %bb.105: // %cond.load225 |
| ; CHECK-NEXT: mov w12, #57 // =0x39 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w12 |
| ; CHECK-NEXT: ldrb w12, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w12 |
| ; CHECK-NEXT: .LBB37_106: // %else226 |
| ; CHECK-NEXT: lastb w12, p1, z1.b |
| ; CHECK-NEXT: orr x13, x15, x9, lsl #58 |
| ; CHECK-NEXT: mov w9, #127 // =0x7f |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: and w11, w14, #0x1 |
| ; CHECK-NEXT: tbz x8, #58, .LBB37_108 |
| ; CHECK-NEXT: // %bb.107: // %cond.load229 |
| ; CHECK-NEXT: mov w14, #58 // =0x3a |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB37_108: // %else230 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x13, x13, x10, lsl #59 |
| ; CHECK-NEXT: and w10, w12, #0x1 |
| ; CHECK-NEXT: tbz x8, #59, .LBB37_110 |
| ; CHECK-NEXT: // %bb.109: // %cond.load233 |
| ; CHECK-NEXT: mov w12, #59 // =0x3b |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w12 |
| ; CHECK-NEXT: ldrb w12, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w12 |
| ; CHECK-NEXT: .LBB37_110: // %else234 |
| ; CHECK-NEXT: orr x12, x13, x11, lsl #60 |
| ; CHECK-NEXT: whilels p1.b, xzr, x9 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: tbz x8, #60, .LBB37_112 |
| ; CHECK-NEXT: // %bb.111: // %cond.load237 |
| ; CHECK-NEXT: mov w11, #60 // =0x3c |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w11 |
| ; CHECK-NEXT: ldrb w11, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w11 |
| ; CHECK-NEXT: .LBB37_112: // %else238 |
| ; CHECK-NEXT: lastb w11, p1, z1.b |
| ; CHECK-NEXT: orr x10, x12, x10, lsl #61 |
| ; CHECK-NEXT: tbnz x8, #61, .LBB37_191 |
| ; CHECK-NEXT: // %bb.113: // %else242 |
| ; CHECK-NEXT: orr x9, x10, x9, lsl #62 |
| ; CHECK-NEXT: tbnz x8, #62, .LBB37_192 |
| ; CHECK-NEXT: .LBB37_114: // %else246 |
| ; CHECK-NEXT: orr x9, x9, x11, lsl #63 |
| ; CHECK-NEXT: tbnz x8, #63, .LBB37_193 |
| ; CHECK-NEXT: .LBB37_115: // %else250 |
| ; CHECK-NEXT: tbnz w9, #0, .LBB37_194 |
| ; CHECK-NEXT: .LBB37_116: // %else254 |
| ; CHECK-NEXT: tbnz w9, #1, .LBB37_195 |
| ; CHECK-NEXT: .LBB37_117: // %else258 |
| ; CHECK-NEXT: tbnz w9, #2, .LBB37_196 |
| ; CHECK-NEXT: .LBB37_118: // %else262 |
| ; CHECK-NEXT: tbnz w9, #3, .LBB37_197 |
| ; CHECK-NEXT: .LBB37_119: // %else266 |
| ; CHECK-NEXT: tbnz w9, #4, .LBB37_198 |
| ; CHECK-NEXT: .LBB37_120: // %else270 |
| ; CHECK-NEXT: tbnz w9, #5, .LBB37_199 |
| ; CHECK-NEXT: .LBB37_121: // %else274 |
| ; CHECK-NEXT: tbnz w9, #6, .LBB37_200 |
| ; CHECK-NEXT: .LBB37_122: // %else278 |
| ; CHECK-NEXT: tbnz w9, #7, .LBB37_201 |
| ; CHECK-NEXT: .LBB37_123: // %else282 |
| ; CHECK-NEXT: tbnz w9, #8, .LBB37_202 |
| ; CHECK-NEXT: .LBB37_124: // %else286 |
| ; CHECK-NEXT: tbnz w9, #9, .LBB37_203 |
| ; CHECK-NEXT: .LBB37_125: // %else290 |
| ; CHECK-NEXT: tbnz w9, #10, .LBB37_204 |
| ; CHECK-NEXT: .LBB37_126: // %else294 |
| ; CHECK-NEXT: tbnz w9, #11, .LBB37_205 |
| ; CHECK-NEXT: .LBB37_127: // %else298 |
| ; CHECK-NEXT: tbnz w9, #12, .LBB37_206 |
| ; CHECK-NEXT: .LBB37_128: // %else302 |
| ; CHECK-NEXT: tbnz w9, #13, .LBB37_207 |
| ; CHECK-NEXT: .LBB37_129: // %else306 |
| ; CHECK-NEXT: tbnz w9, #14, .LBB37_208 |
| ; CHECK-NEXT: .LBB37_130: // %else310 |
| ; CHECK-NEXT: tbnz w9, #15, .LBB37_209 |
| ; CHECK-NEXT: .LBB37_131: // %else314 |
| ; CHECK-NEXT: tbnz w9, #16, .LBB37_210 |
| ; CHECK-NEXT: .LBB37_132: // %else318 |
| ; CHECK-NEXT: tbnz w9, #17, .LBB37_211 |
| ; CHECK-NEXT: .LBB37_133: // %else322 |
| ; CHECK-NEXT: tbnz w9, #18, .LBB37_212 |
| ; CHECK-NEXT: .LBB37_134: // %else326 |
| ; CHECK-NEXT: tbnz w9, #19, .LBB37_213 |
| ; CHECK-NEXT: .LBB37_135: // %else330 |
| ; CHECK-NEXT: tbnz w9, #20, .LBB37_214 |
| ; CHECK-NEXT: .LBB37_136: // %else334 |
| ; CHECK-NEXT: tbnz w9, #21, .LBB37_215 |
| ; CHECK-NEXT: .LBB37_137: // %else338 |
| ; CHECK-NEXT: tbnz w9, #22, .LBB37_216 |
| ; CHECK-NEXT: .LBB37_138: // %else342 |
| ; CHECK-NEXT: tbnz w9, #23, .LBB37_217 |
| ; CHECK-NEXT: .LBB37_139: // %else346 |
| ; CHECK-NEXT: tbnz w9, #24, .LBB37_218 |
| ; CHECK-NEXT: .LBB37_140: // %else350 |
| ; CHECK-NEXT: tbnz w9, #25, .LBB37_219 |
| ; CHECK-NEXT: .LBB37_141: // %else354 |
| ; CHECK-NEXT: tbnz w9, #26, .LBB37_220 |
| ; CHECK-NEXT: .LBB37_142: // %else358 |
| ; CHECK-NEXT: tbnz w9, #27, .LBB37_221 |
| ; CHECK-NEXT: .LBB37_143: // %else362 |
| ; CHECK-NEXT: tbnz w9, #28, .LBB37_222 |
| ; CHECK-NEXT: .LBB37_144: // %else366 |
| ; CHECK-NEXT: tbnz w9, #29, .LBB37_223 |
| ; CHECK-NEXT: .LBB37_145: // %else370 |
| ; CHECK-NEXT: tbnz w9, #30, .LBB37_224 |
| ; CHECK-NEXT: .LBB37_146: // %else374 |
| ; CHECK-NEXT: tbnz w9, #31, .LBB37_225 |
| ; CHECK-NEXT: .LBB37_147: // %else378 |
| ; CHECK-NEXT: tbnz x9, #32, .LBB37_226 |
| ; CHECK-NEXT: .LBB37_148: // %else382 |
| ; CHECK-NEXT: tbnz x9, #33, .LBB37_227 |
| ; CHECK-NEXT: .LBB37_149: // %else386 |
| ; CHECK-NEXT: tbnz x9, #34, .LBB37_228 |
| ; CHECK-NEXT: .LBB37_150: // %else390 |
| ; CHECK-NEXT: tbnz x9, #35, .LBB37_229 |
| ; CHECK-NEXT: .LBB37_151: // %else394 |
| ; CHECK-NEXT: tbnz x9, #36, .LBB37_230 |
| ; CHECK-NEXT: .LBB37_152: // %else398 |
| ; CHECK-NEXT: tbnz x9, #37, .LBB37_231 |
| ; CHECK-NEXT: .LBB37_153: // %else402 |
| ; CHECK-NEXT: tbnz x9, #38, .LBB37_232 |
| ; CHECK-NEXT: .LBB37_154: // %else406 |
| ; CHECK-NEXT: tbnz x9, #39, .LBB37_233 |
| ; CHECK-NEXT: .LBB37_155: // %else410 |
| ; CHECK-NEXT: tbnz x9, #40, .LBB37_234 |
| ; CHECK-NEXT: .LBB37_156: // %else414 |
| ; CHECK-NEXT: tbnz x9, #41, .LBB37_235 |
| ; CHECK-NEXT: .LBB37_157: // %else418 |
| ; CHECK-NEXT: tbnz x9, #42, .LBB37_236 |
| ; CHECK-NEXT: .LBB37_158: // %else422 |
| ; CHECK-NEXT: tbnz x9, #43, .LBB37_237 |
| ; CHECK-NEXT: .LBB37_159: // %else426 |
| ; CHECK-NEXT: tbnz x9, #44, .LBB37_238 |
| ; CHECK-NEXT: .LBB37_160: // %else430 |
| ; CHECK-NEXT: tbnz x9, #45, .LBB37_239 |
| ; CHECK-NEXT: .LBB37_161: // %else434 |
| ; CHECK-NEXT: tbnz x9, #46, .LBB37_240 |
| ; CHECK-NEXT: .LBB37_162: // %else438 |
| ; CHECK-NEXT: tbnz x9, #47, .LBB37_241 |
| ; CHECK-NEXT: .LBB37_163: // %else442 |
| ; CHECK-NEXT: tbnz x9, #48, .LBB37_242 |
| ; CHECK-NEXT: .LBB37_164: // %else446 |
| ; CHECK-NEXT: tbnz x9, #49, .LBB37_243 |
| ; CHECK-NEXT: .LBB37_165: // %else450 |
| ; CHECK-NEXT: tbnz x9, #50, .LBB37_244 |
| ; CHECK-NEXT: .LBB37_166: // %else454 |
| ; CHECK-NEXT: tbnz x9, #51, .LBB37_245 |
| ; CHECK-NEXT: .LBB37_167: // %else458 |
| ; CHECK-NEXT: tbnz x9, #52, .LBB37_246 |
| ; CHECK-NEXT: .LBB37_168: // %else462 |
| ; CHECK-NEXT: tbnz x9, #53, .LBB37_247 |
| ; CHECK-NEXT: .LBB37_169: // %else466 |
| ; CHECK-NEXT: tbnz x9, #54, .LBB37_248 |
| ; CHECK-NEXT: .LBB37_170: // %else470 |
| ; CHECK-NEXT: tbnz x9, #55, .LBB37_249 |
| ; CHECK-NEXT: .LBB37_171: // %else474 |
| ; CHECK-NEXT: tbnz x9, #56, .LBB37_250 |
| ; CHECK-NEXT: .LBB37_172: // %else478 |
| ; CHECK-NEXT: tbnz x9, #57, .LBB37_251 |
| ; CHECK-NEXT: .LBB37_173: // %else482 |
| ; CHECK-NEXT: tbnz x9, #58, .LBB37_252 |
| ; CHECK-NEXT: .LBB37_174: // %else486 |
| ; CHECK-NEXT: tbnz x9, #59, .LBB37_253 |
| ; CHECK-NEXT: .LBB37_175: // %else490 |
| ; CHECK-NEXT: tbnz x9, #60, .LBB37_254 |
| ; CHECK-NEXT: .LBB37_176: // %else494 |
| ; CHECK-NEXT: tbnz x9, #61, .LBB37_255 |
| ; CHECK-NEXT: .LBB37_177: // %else498 |
| ; CHECK-NEXT: tbnz x9, #62, .LBB37_256 |
| ; CHECK-NEXT: .LBB37_178: // %else502 |
| ; CHECK-NEXT: tbz x9, #63, .LBB37_180 |
| ; CHECK-NEXT: .LBB37_179: // %cond.load505 |
| ; CHECK-NEXT: mov w8, #127 // =0x7f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0] |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: .LBB37_180: // %else506 |
| ; CHECK-NEXT: sunpklo z0.h, z0.b |
| ; CHECK-NEXT: ptrue p0.h, vl128 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: st1h { z0.h }, p0, [x2] |
| ; CHECK-NEXT: add sp, sp, #64 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB37_181: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #3, .LBB37_6 |
| ; CHECK-NEXT: .LBB37_182: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #4, .LBB37_7 |
| ; CHECK-NEXT: .LBB37_183: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #5, .LBB37_8 |
| ; CHECK-NEXT: .LBB37_184: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #6, .LBB37_9 |
| ; CHECK-NEXT: .LBB37_185: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #7, .LBB37_10 |
| ; CHECK-NEXT: .LBB37_186: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #8, .LBB37_11 |
| ; CHECK-NEXT: .LBB37_187: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #9, .LBB37_12 |
| ; CHECK-NEXT: .LBB37_188: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #10, .LBB37_13 |
| ; CHECK-NEXT: .LBB37_189: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #11, .LBB37_14 |
| ; CHECK-NEXT: .LBB37_190: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB37_15 |
| ; CHECK-NEXT: b .LBB37_16 |
| ; CHECK-NEXT: .LBB37_191: // %cond.load241 |
| ; CHECK-NEXT: mov w12, #61 // =0x3d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w12 |
| ; CHECK-NEXT: ldrb w12, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w12 |
| ; CHECK-NEXT: orr x9, x10, x9, lsl #62 |
| ; CHECK-NEXT: tbz x8, #62, .LBB37_114 |
| ; CHECK-NEXT: .LBB37_192: // %cond.load245 |
| ; CHECK-NEXT: mov w10, #62 // =0x3e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w10 |
| ; CHECK-NEXT: ldrb w10, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w10 |
| ; CHECK-NEXT: orr x9, x9, x11, lsl #63 |
| ; CHECK-NEXT: tbz x8, #63, .LBB37_115 |
| ; CHECK-NEXT: .LBB37_193: // %cond.load249 |
| ; CHECK-NEXT: mov w8, #63 // =0x3f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #0, .LBB37_116 |
| ; CHECK-NEXT: .LBB37_194: // %cond.load253 |
| ; CHECK-NEXT: mov w8, #64 // =0x40 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #1, .LBB37_117 |
| ; CHECK-NEXT: .LBB37_195: // %cond.load257 |
| ; CHECK-NEXT: mov w8, #65 // =0x41 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #2, .LBB37_118 |
| ; CHECK-NEXT: .LBB37_196: // %cond.load261 |
| ; CHECK-NEXT: mov w8, #66 // =0x42 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #3, .LBB37_119 |
| ; CHECK-NEXT: .LBB37_197: // %cond.load265 |
| ; CHECK-NEXT: mov w8, #67 // =0x43 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #4, .LBB37_120 |
| ; CHECK-NEXT: .LBB37_198: // %cond.load269 |
| ; CHECK-NEXT: mov w8, #68 // =0x44 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #5, .LBB37_121 |
| ; CHECK-NEXT: .LBB37_199: // %cond.load273 |
| ; CHECK-NEXT: mov w8, #69 // =0x45 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #6, .LBB37_122 |
| ; CHECK-NEXT: .LBB37_200: // %cond.load277 |
| ; CHECK-NEXT: mov w8, #70 // =0x46 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #7, .LBB37_123 |
| ; CHECK-NEXT: .LBB37_201: // %cond.load281 |
| ; CHECK-NEXT: mov w8, #71 // =0x47 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #8, .LBB37_124 |
| ; CHECK-NEXT: .LBB37_202: // %cond.load285 |
| ; CHECK-NEXT: mov w8, #72 // =0x48 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #9, .LBB37_125 |
| ; CHECK-NEXT: .LBB37_203: // %cond.load289 |
| ; CHECK-NEXT: mov w8, #73 // =0x49 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #10, .LBB37_126 |
| ; CHECK-NEXT: .LBB37_204: // %cond.load293 |
| ; CHECK-NEXT: mov w8, #74 // =0x4a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #11, .LBB37_127 |
| ; CHECK-NEXT: .LBB37_205: // %cond.load297 |
| ; CHECK-NEXT: mov w8, #75 // =0x4b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #12, .LBB37_128 |
| ; CHECK-NEXT: .LBB37_206: // %cond.load301 |
| ; CHECK-NEXT: mov w8, #76 // =0x4c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #13, .LBB37_129 |
| ; CHECK-NEXT: .LBB37_207: // %cond.load305 |
| ; CHECK-NEXT: mov w8, #77 // =0x4d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #14, .LBB37_130 |
| ; CHECK-NEXT: .LBB37_208: // %cond.load309 |
| ; CHECK-NEXT: mov w8, #78 // =0x4e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #15, .LBB37_131 |
| ; CHECK-NEXT: .LBB37_209: // %cond.load313 |
| ; CHECK-NEXT: mov w8, #79 // =0x4f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #16, .LBB37_132 |
| ; CHECK-NEXT: .LBB37_210: // %cond.load317 |
| ; CHECK-NEXT: mov w8, #80 // =0x50 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #17, .LBB37_133 |
| ; CHECK-NEXT: .LBB37_211: // %cond.load321 |
| ; CHECK-NEXT: mov w8, #81 // =0x51 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #18, .LBB37_134 |
| ; CHECK-NEXT: .LBB37_212: // %cond.load325 |
| ; CHECK-NEXT: mov w8, #82 // =0x52 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #19, .LBB37_135 |
| ; CHECK-NEXT: .LBB37_213: // %cond.load329 |
| ; CHECK-NEXT: mov w8, #83 // =0x53 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #20, .LBB37_136 |
| ; CHECK-NEXT: .LBB37_214: // %cond.load333 |
| ; CHECK-NEXT: mov w8, #84 // =0x54 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #21, .LBB37_137 |
| ; CHECK-NEXT: .LBB37_215: // %cond.load337 |
| ; CHECK-NEXT: mov w8, #85 // =0x55 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #22, .LBB37_138 |
| ; CHECK-NEXT: .LBB37_216: // %cond.load341 |
| ; CHECK-NEXT: mov w8, #86 // =0x56 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #23, .LBB37_139 |
| ; CHECK-NEXT: .LBB37_217: // %cond.load345 |
| ; CHECK-NEXT: mov w8, #87 // =0x57 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #24, .LBB37_140 |
| ; CHECK-NEXT: .LBB37_218: // %cond.load349 |
| ; CHECK-NEXT: mov w8, #88 // =0x58 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #25, .LBB37_141 |
| ; CHECK-NEXT: .LBB37_219: // %cond.load353 |
| ; CHECK-NEXT: mov w8, #89 // =0x59 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #26, .LBB37_142 |
| ; CHECK-NEXT: .LBB37_220: // %cond.load357 |
| ; CHECK-NEXT: mov w8, #90 // =0x5a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #27, .LBB37_143 |
| ; CHECK-NEXT: .LBB37_221: // %cond.load361 |
| ; CHECK-NEXT: mov w8, #91 // =0x5b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #28, .LBB37_144 |
| ; CHECK-NEXT: .LBB37_222: // %cond.load365 |
| ; CHECK-NEXT: mov w8, #92 // =0x5c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #29, .LBB37_145 |
| ; CHECK-NEXT: .LBB37_223: // %cond.load369 |
| ; CHECK-NEXT: mov w8, #93 // =0x5d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #30, .LBB37_146 |
| ; CHECK-NEXT: .LBB37_224: // %cond.load373 |
| ; CHECK-NEXT: mov w8, #94 // =0x5e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #31, .LBB37_147 |
| ; CHECK-NEXT: .LBB37_225: // %cond.load377 |
| ; CHECK-NEXT: mov w8, #95 // =0x5f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #32, .LBB37_148 |
| ; CHECK-NEXT: .LBB37_226: // %cond.load381 |
| ; CHECK-NEXT: mov w8, #96 // =0x60 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #33, .LBB37_149 |
| ; CHECK-NEXT: .LBB37_227: // %cond.load385 |
| ; CHECK-NEXT: mov w8, #97 // =0x61 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #34, .LBB37_150 |
| ; CHECK-NEXT: .LBB37_228: // %cond.load389 |
| ; CHECK-NEXT: mov w8, #98 // =0x62 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #35, .LBB37_151 |
| ; CHECK-NEXT: .LBB37_229: // %cond.load393 |
| ; CHECK-NEXT: mov w8, #99 // =0x63 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #36, .LBB37_152 |
| ; CHECK-NEXT: .LBB37_230: // %cond.load397 |
| ; CHECK-NEXT: mov w8, #100 // =0x64 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #37, .LBB37_153 |
| ; CHECK-NEXT: .LBB37_231: // %cond.load401 |
| ; CHECK-NEXT: mov w8, #101 // =0x65 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #38, .LBB37_154 |
| ; CHECK-NEXT: .LBB37_232: // %cond.load405 |
| ; CHECK-NEXT: mov w8, #102 // =0x66 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #39, .LBB37_155 |
| ; CHECK-NEXT: .LBB37_233: // %cond.load409 |
| ; CHECK-NEXT: mov w8, #103 // =0x67 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #40, .LBB37_156 |
| ; CHECK-NEXT: .LBB37_234: // %cond.load413 |
| ; CHECK-NEXT: mov w8, #104 // =0x68 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #41, .LBB37_157 |
| ; CHECK-NEXT: .LBB37_235: // %cond.load417 |
| ; CHECK-NEXT: mov w8, #105 // =0x69 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #42, .LBB37_158 |
| ; CHECK-NEXT: .LBB37_236: // %cond.load421 |
| ; CHECK-NEXT: mov w8, #106 // =0x6a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #43, .LBB37_159 |
| ; CHECK-NEXT: .LBB37_237: // %cond.load425 |
| ; CHECK-NEXT: mov w8, #107 // =0x6b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #44, .LBB37_160 |
| ; CHECK-NEXT: .LBB37_238: // %cond.load429 |
| ; CHECK-NEXT: mov w8, #108 // =0x6c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #45, .LBB37_161 |
| ; CHECK-NEXT: .LBB37_239: // %cond.load433 |
| ; CHECK-NEXT: mov w8, #109 // =0x6d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #46, .LBB37_162 |
| ; CHECK-NEXT: .LBB37_240: // %cond.load437 |
| ; CHECK-NEXT: mov w8, #110 // =0x6e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #47, .LBB37_163 |
| ; CHECK-NEXT: .LBB37_241: // %cond.load441 |
| ; CHECK-NEXT: mov w8, #111 // =0x6f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #48, .LBB37_164 |
| ; CHECK-NEXT: .LBB37_242: // %cond.load445 |
| ; CHECK-NEXT: mov w8, #112 // =0x70 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #49, .LBB37_165 |
| ; CHECK-NEXT: .LBB37_243: // %cond.load449 |
| ; CHECK-NEXT: mov w8, #113 // =0x71 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #50, .LBB37_166 |
| ; CHECK-NEXT: .LBB37_244: // %cond.load453 |
| ; CHECK-NEXT: mov w8, #114 // =0x72 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #51, .LBB37_167 |
| ; CHECK-NEXT: .LBB37_245: // %cond.load457 |
| ; CHECK-NEXT: mov w8, #115 // =0x73 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #52, .LBB37_168 |
| ; CHECK-NEXT: .LBB37_246: // %cond.load461 |
| ; CHECK-NEXT: mov w8, #116 // =0x74 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #53, .LBB37_169 |
| ; CHECK-NEXT: .LBB37_247: // %cond.load465 |
| ; CHECK-NEXT: mov w8, #117 // =0x75 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #54, .LBB37_170 |
| ; CHECK-NEXT: .LBB37_248: // %cond.load469 |
| ; CHECK-NEXT: mov w8, #118 // =0x76 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #55, .LBB37_171 |
| ; CHECK-NEXT: .LBB37_249: // %cond.load473 |
| ; CHECK-NEXT: mov w8, #119 // =0x77 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #56, .LBB37_172 |
| ; CHECK-NEXT: .LBB37_250: // %cond.load477 |
| ; CHECK-NEXT: mov w8, #120 // =0x78 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #57, .LBB37_173 |
| ; CHECK-NEXT: .LBB37_251: // %cond.load481 |
| ; CHECK-NEXT: mov w8, #121 // =0x79 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #58, .LBB37_174 |
| ; CHECK-NEXT: .LBB37_252: // %cond.load485 |
| ; CHECK-NEXT: mov w8, #122 // =0x7a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #59, .LBB37_175 |
| ; CHECK-NEXT: .LBB37_253: // %cond.load489 |
| ; CHECK-NEXT: mov w8, #123 // =0x7b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #60, .LBB37_176 |
| ; CHECK-NEXT: .LBB37_254: // %cond.load493 |
| ; CHECK-NEXT: mov w8, #124 // =0x7c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #61, .LBB37_177 |
| ; CHECK-NEXT: .LBB37_255: // %cond.load497 |
| ; CHECK-NEXT: mov w8, #125 // =0x7d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #62, .LBB37_178 |
| ; CHECK-NEXT: .LBB37_256: // %cond.load501 |
| ; CHECK-NEXT: mov w8, #126 // =0x7e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbnz x9, #63, .LBB37_179 |
| ; CHECK-NEXT: b .LBB37_180 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v128i8i16: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.h, vl128 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.h }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h |
| ; CHECK-EXPAND-NEXT: whilelo p2.h, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1sb { z0.h }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h |
| ; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <128 x i8>, ptr %bp |
| %mask = icmp eq <128 x i8> %b, zeroinitializer |
| %load = call <128 x i8> @llvm.masked.expandload.v128i8(ptr %ap, <128 x i1> %mask, <128 x i8> poison) |
| %ext = sext <128 x i8> %load to <128 x i16> |
| store <128 x i16> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v64i8i32(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_sext_v64i8i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #112 |
| ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: ptrue p1.b, vl64 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x1] |
| ; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, #0 |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: umov w11, v0.b[1] |
| ; CHECK-NEXT: fmov w22, s0 |
| ; CHECK-NEXT: umov w12, v0.b[2] |
| ; CHECK-NEXT: umov w13, v0.b[3] |
| ; CHECK-NEXT: umov w14, v0.b[7] |
| ; CHECK-NEXT: umov w1, v0.b[8] |
| ; CHECK-NEXT: umov w16, v0.b[9] |
| ; CHECK-NEXT: mov z3.b, z0.b[18] |
| ; CHECK-NEXT: mov z5.b, z0.b[19] |
| ; CHECK-NEXT: and x22, x22, #0x1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: umov w17, v0.b[10] |
| ; CHECK-NEXT: bfi x22, x11, #1, #1 |
| ; CHECK-NEXT: mov z6.b, z0.b[20] |
| ; CHECK-NEXT: umov w3, v0.b[11] |
| ; CHECK-NEXT: mov z4.b, z0.b[21] |
| ; CHECK-NEXT: umov w9, v0.b[5] |
| ; CHECK-NEXT: mov z7.b, z0.b[22] |
| ; CHECK-NEXT: bfi x22, x12, #2, #1 |
| ; CHECK-NEXT: fmov w19, s3 |
| ; CHECK-NEXT: fmov w20, s5 |
| ; CHECK-NEXT: ubfiz x14, x14, #7, #1 |
| ; CHECK-NEXT: ubfiz x1, x1, #8, #1 |
| ; CHECK-NEXT: umov w4, v0.b[12] |
| ; CHECK-NEXT: bfi x22, x13, #3, #1 |
| ; CHECK-NEXT: mov z16.b, z0.b[23] |
| ; CHECK-NEXT: fmov w21, s6 |
| ; CHECK-NEXT: ubfiz x16, x16, #9, #1 |
| ; CHECK-NEXT: umov w8, v0.b[6] |
| ; CHECK-NEXT: umov w5, v0.b[13] |
| ; CHECK-NEXT: mov z17.b, z0.b[24] |
| ; CHECK-NEXT: fmov w23, s4 |
| ; CHECK-NEXT: orr x14, x14, x1 |
| ; CHECK-NEXT: bfi x22, x10, #4, #1 |
| ; CHECK-NEXT: ubfiz x10, x17, #10, #1 |
| ; CHECK-NEXT: mov z18.b, z0.b[25] |
| ; CHECK-NEXT: fmov w24, s7 |
| ; CHECK-NEXT: ubfiz x13, x19, #18, #1 |
| ; CHECK-NEXT: ubfiz x19, x20, #19, #1 |
| ; CHECK-NEXT: orr x14, x14, x16 |
| ; CHECK-NEXT: ubfiz x16, x3, #11, #1 |
| ; CHECK-NEXT: umov w15, v0.b[14] |
| ; CHECK-NEXT: mov z19.b, z0.b[26] |
| ; CHECK-NEXT: fmov w25, s16 |
| ; CHECK-NEXT: ubfiz x1, x21, #20, #1 |
| ; CHECK-NEXT: orr x10, x14, x10 |
| ; CHECK-NEXT: bfi x22, x9, #5, #1 |
| ; CHECK-NEXT: mov z20.b, z0.b[27] |
| ; CHECK-NEXT: fmov w26, s17 |
| ; CHECK-NEXT: orr x13, x13, x19 |
| ; CHECK-NEXT: ubfiz x9, x4, #12, #1 |
| ; CHECK-NEXT: orr x10, x10, x16 |
| ; CHECK-NEXT: ubfiz x16, x23, #21, #1 |
| ; CHECK-NEXT: umov w18, v0.b[15] |
| ; CHECK-NEXT: mov z1.b, z0.b[16] |
| ; CHECK-NEXT: mov z21.b, z0.b[28] |
| ; CHECK-NEXT: fmov w11, s18 |
| ; CHECK-NEXT: orr x13, x13, x1 |
| ; CHECK-NEXT: ubfiz x14, x5, #13, #1 |
| ; CHECK-NEXT: bfi x22, x8, #6, #1 |
| ; CHECK-NEXT: ubfiz x8, x24, #22, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[17] |
| ; CHECK-NEXT: mov z22.b, z0.b[29] |
| ; CHECK-NEXT: fmov w27, s19 |
| ; CHECK-NEXT: orr x9, x10, x9 |
| ; CHECK-NEXT: orr x10, x13, x16 |
| ; CHECK-NEXT: ubfiz x13, x25, #23, #1 |
| ; CHECK-NEXT: mov z5.b, z0.b[30] |
| ; CHECK-NEXT: fmov w28, s20 |
| ; CHECK-NEXT: orr x9, x9, x14 |
| ; CHECK-NEXT: orr x8, x10, x8 |
| ; CHECK-NEXT: ubfiz x10, x15, #14, #1 |
| ; CHECK-NEXT: ubfiz x14, x26, #24, #1 |
| ; CHECK-NEXT: fmov w6, s1 |
| ; CHECK-NEXT: fmov w29, s21 |
| ; CHECK-NEXT: orr x8, x8, x13 |
| ; CHECK-NEXT: ubfiz x11, x11, #25, #1 |
| ; CHECK-NEXT: fmov w7, s2 |
| ; CHECK-NEXT: fmov w30, s22 |
| ; CHECK-NEXT: ubfiz x13, x18, #15, #1 |
| ; CHECK-NEXT: orr x9, x9, x10 |
| ; CHECK-NEXT: orr x8, x8, x14 |
| ; CHECK-NEXT: ubfiz x10, x27, #26, #1 |
| ; CHECK-NEXT: fmov w12, s5 |
| ; CHECK-NEXT: orr x8, x8, x11 |
| ; CHECK-NEXT: ubfiz x11, x28, #27, #1 |
| ; CHECK-NEXT: mov z3.b, z0.b[31] |
| ; CHECK-NEXT: orr x9, x9, x13 |
| ; CHECK-NEXT: orr x8, x8, x10 |
| ; CHECK-NEXT: ubfiz x10, x6, #16, #1 |
| ; CHECK-NEXT: ubfiz x13, x29, #28, #1 |
| ; CHECK-NEXT: orr x8, x8, x11 |
| ; CHECK-NEXT: ubfiz x11, x7, #17, #1 |
| ; CHECK-NEXT: ubfiz x14, x30, #29, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[32] |
| ; CHECK-NEXT: orr x9, x9, x10 |
| ; CHECK-NEXT: orr x8, x8, x13 |
| ; CHECK-NEXT: ubfiz x10, x12, #30, #1 |
| ; CHECK-NEXT: fmov w12, s3 |
| ; CHECK-NEXT: orr x9, x9, x11 |
| ; CHECK-NEXT: orr x8, x8, x14 |
| ; CHECK-NEXT: mov z1.b, z0.b[33] |
| ; CHECK-NEXT: orr x9, x22, x9 |
| ; CHECK-NEXT: orr x8, x8, x10 |
| ; CHECK-NEXT: orr x8, x9, x8 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: lsl w10, w12, #31 |
| ; CHECK-NEXT: mov z2.b, z0.b[34] |
| ; CHECK-NEXT: orr x8, x8, x10 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #32 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[35] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #33 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[36] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #34 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[37] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #35 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[38] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #36 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[39] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #37 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[40] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #38 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[41] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #39 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[42] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #40 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[43] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #41 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[44] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #42 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[45] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #43 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[46] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #44 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[47] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #45 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[48] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #46 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[49] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #47 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[50] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #48 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[51] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #49 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[52] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #50 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[53] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #51 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[54] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #52 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[55] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #53 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[56] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #54 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[57] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #55 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[58] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #56 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[59] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #57 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[60] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #58 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[61] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: fmov w10, s1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #59 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[62] |
| ; CHECK-NEXT: mov z0.b, z0.b[63] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #60 |
| ; CHECK-NEXT: and w9, w10, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #61 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #62 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #63 |
| ; CHECK-NEXT: tbz w8, #0, .LBB38_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] |
| ; CHECK-NEXT: add x0, x0, #1 |
| ; CHECK-NEXT: tbnz w8, #1, .LBB38_3 |
| ; CHECK-NEXT: b .LBB38_4 |
| ; CHECK-NEXT: .LBB38_2: |
| ; CHECK-NEXT: adrp x9, .LCPI38_0 |
| ; CHECK-NEXT: add x9, x9, :lo12:.LCPI38_0 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9] |
| ; CHECK-NEXT: tbz w8, #1, .LBB38_4 |
| ; CHECK-NEXT: .LBB38_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: .LBB38_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB38_68 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB38_69 |
| ; CHECK-NEXT: .LBB38_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB38_70 |
| ; CHECK-NEXT: .LBB38_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB38_71 |
| ; CHECK-NEXT: .LBB38_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB38_72 |
| ; CHECK-NEXT: .LBB38_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB38_73 |
| ; CHECK-NEXT: .LBB38_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB38_74 |
| ; CHECK-NEXT: .LBB38_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB38_75 |
| ; CHECK-NEXT: .LBB38_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB38_76 |
| ; CHECK-NEXT: .LBB38_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB38_77 |
| ; CHECK-NEXT: .LBB38_14: // %else42 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB38_78 |
| ; CHECK-NEXT: .LBB38_15: // %else46 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB38_79 |
| ; CHECK-NEXT: .LBB38_16: // %else50 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB38_80 |
| ; CHECK-NEXT: .LBB38_17: // %else54 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB38_81 |
| ; CHECK-NEXT: .LBB38_18: // %else58 |
| ; CHECK-NEXT: tbnz w8, #16, .LBB38_82 |
| ; CHECK-NEXT: .LBB38_19: // %else62 |
| ; CHECK-NEXT: tbnz w8, #17, .LBB38_83 |
| ; CHECK-NEXT: .LBB38_20: // %else66 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB38_84 |
| ; CHECK-NEXT: .LBB38_21: // %else70 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB38_85 |
| ; CHECK-NEXT: .LBB38_22: // %else74 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB38_86 |
| ; CHECK-NEXT: .LBB38_23: // %else78 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB38_87 |
| ; CHECK-NEXT: .LBB38_24: // %else82 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB38_88 |
| ; CHECK-NEXT: .LBB38_25: // %else86 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB38_89 |
| ; CHECK-NEXT: .LBB38_26: // %else90 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB38_90 |
| ; CHECK-NEXT: .LBB38_27: // %else94 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB38_91 |
| ; CHECK-NEXT: .LBB38_28: // %else98 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB38_92 |
| ; CHECK-NEXT: .LBB38_29: // %else102 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB38_93 |
| ; CHECK-NEXT: .LBB38_30: // %else106 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB38_94 |
| ; CHECK-NEXT: .LBB38_31: // %else110 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB38_95 |
| ; CHECK-NEXT: .LBB38_32: // %else114 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB38_96 |
| ; CHECK-NEXT: .LBB38_33: // %else118 |
| ; CHECK-NEXT: tbnz w8, #31, .LBB38_97 |
| ; CHECK-NEXT: .LBB38_34: // %else122 |
| ; CHECK-NEXT: tbnz x8, #32, .LBB38_98 |
| ; CHECK-NEXT: .LBB38_35: // %else126 |
| ; CHECK-NEXT: tbnz x8, #33, .LBB38_99 |
| ; CHECK-NEXT: .LBB38_36: // %else130 |
| ; CHECK-NEXT: tbnz x8, #34, .LBB38_100 |
| ; CHECK-NEXT: .LBB38_37: // %else134 |
| ; CHECK-NEXT: tbnz x8, #35, .LBB38_101 |
| ; CHECK-NEXT: .LBB38_38: // %else138 |
| ; CHECK-NEXT: tbnz x8, #36, .LBB38_102 |
| ; CHECK-NEXT: .LBB38_39: // %else142 |
| ; CHECK-NEXT: tbnz x8, #37, .LBB38_103 |
| ; CHECK-NEXT: .LBB38_40: // %else146 |
| ; CHECK-NEXT: tbnz x8, #38, .LBB38_104 |
| ; CHECK-NEXT: .LBB38_41: // %else150 |
| ; CHECK-NEXT: tbnz x8, #39, .LBB38_105 |
| ; CHECK-NEXT: .LBB38_42: // %else154 |
| ; CHECK-NEXT: tbnz x8, #40, .LBB38_106 |
| ; CHECK-NEXT: .LBB38_43: // %else158 |
| ; CHECK-NEXT: tbnz x8, #41, .LBB38_107 |
| ; CHECK-NEXT: .LBB38_44: // %else162 |
| ; CHECK-NEXT: tbnz x8, #42, .LBB38_108 |
| ; CHECK-NEXT: .LBB38_45: // %else166 |
| ; CHECK-NEXT: tbnz x8, #43, .LBB38_109 |
| ; CHECK-NEXT: .LBB38_46: // %else170 |
| ; CHECK-NEXT: tbnz x8, #44, .LBB38_110 |
| ; CHECK-NEXT: .LBB38_47: // %else174 |
| ; CHECK-NEXT: tbnz x8, #45, .LBB38_111 |
| ; CHECK-NEXT: .LBB38_48: // %else178 |
| ; CHECK-NEXT: tbnz x8, #46, .LBB38_112 |
| ; CHECK-NEXT: .LBB38_49: // %else182 |
| ; CHECK-NEXT: tbnz x8, #47, .LBB38_113 |
| ; CHECK-NEXT: .LBB38_50: // %else186 |
| ; CHECK-NEXT: tbnz x8, #48, .LBB38_114 |
| ; CHECK-NEXT: .LBB38_51: // %else190 |
| ; CHECK-NEXT: tbnz x8, #49, .LBB38_115 |
| ; CHECK-NEXT: .LBB38_52: // %else194 |
| ; CHECK-NEXT: tbnz x8, #50, .LBB38_116 |
| ; CHECK-NEXT: .LBB38_53: // %else198 |
| ; CHECK-NEXT: tbnz x8, #51, .LBB38_117 |
| ; CHECK-NEXT: .LBB38_54: // %else202 |
| ; CHECK-NEXT: tbnz x8, #52, .LBB38_118 |
| ; CHECK-NEXT: .LBB38_55: // %else206 |
| ; CHECK-NEXT: tbnz x8, #53, .LBB38_119 |
| ; CHECK-NEXT: .LBB38_56: // %else210 |
| ; CHECK-NEXT: tbnz x8, #54, .LBB38_120 |
| ; CHECK-NEXT: .LBB38_57: // %else214 |
| ; CHECK-NEXT: tbnz x8, #55, .LBB38_121 |
| ; CHECK-NEXT: .LBB38_58: // %else218 |
| ; CHECK-NEXT: tbnz x8, #56, .LBB38_122 |
| ; CHECK-NEXT: .LBB38_59: // %else222 |
| ; CHECK-NEXT: tbnz x8, #57, .LBB38_123 |
| ; CHECK-NEXT: .LBB38_60: // %else226 |
| ; CHECK-NEXT: tbnz x8, #58, .LBB38_124 |
| ; CHECK-NEXT: .LBB38_61: // %else230 |
| ; CHECK-NEXT: tbnz x8, #59, .LBB38_125 |
| ; CHECK-NEXT: .LBB38_62: // %else234 |
| ; CHECK-NEXT: tbnz x8, #60, .LBB38_126 |
| ; CHECK-NEXT: .LBB38_63: // %else238 |
| ; CHECK-NEXT: tbnz x8, #61, .LBB38_127 |
| ; CHECK-NEXT: .LBB38_64: // %else242 |
| ; CHECK-NEXT: tbnz x8, #62, .LBB38_128 |
| ; CHECK-NEXT: .LBB38_65: // %else246 |
| ; CHECK-NEXT: tbz x8, #63, .LBB38_67 |
| ; CHECK-NEXT: .LBB38_66: // %cond.load249 |
| ; CHECK-NEXT: mov w8, #63 // =0x3f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0] |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: .LBB38_67: // %else250 |
| ; CHECK-NEXT: sunpklo z0.h, z0.b |
| ; CHECK-NEXT: ptrue p0.s, vl64 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: sunpklo z0.s, z0.h |
| ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-NEXT: add sp, sp, #112 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB38_68: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #3, .LBB38_6 |
| ; CHECK-NEXT: .LBB38_69: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #4, .LBB38_7 |
| ; CHECK-NEXT: .LBB38_70: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #5, .LBB38_8 |
| ; CHECK-NEXT: .LBB38_71: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #6, .LBB38_9 |
| ; CHECK-NEXT: .LBB38_72: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #7, .LBB38_10 |
| ; CHECK-NEXT: .LBB38_73: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #8, .LBB38_11 |
| ; CHECK-NEXT: .LBB38_74: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #9, .LBB38_12 |
| ; CHECK-NEXT: .LBB38_75: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #10, .LBB38_13 |
| ; CHECK-NEXT: .LBB38_76: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #11, .LBB38_14 |
| ; CHECK-NEXT: .LBB38_77: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #12, .LBB38_15 |
| ; CHECK-NEXT: .LBB38_78: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #13, .LBB38_16 |
| ; CHECK-NEXT: .LBB38_79: // %cond.load49 |
| ; CHECK-NEXT: mov w9, #13 // =0xd |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #14, .LBB38_17 |
| ; CHECK-NEXT: .LBB38_80: // %cond.load53 |
| ; CHECK-NEXT: mov w9, #14 // =0xe |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #15, .LBB38_18 |
| ; CHECK-NEXT: .LBB38_81: // %cond.load57 |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #16, .LBB38_19 |
| ; CHECK-NEXT: .LBB38_82: // %cond.load61 |
| ; CHECK-NEXT: mov w9, #16 // =0x10 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #17, .LBB38_20 |
| ; CHECK-NEXT: .LBB38_83: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #18, .LBB38_21 |
| ; CHECK-NEXT: .LBB38_84: // %cond.load69 |
| ; CHECK-NEXT: mov w9, #18 // =0x12 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #19, .LBB38_22 |
| ; CHECK-NEXT: .LBB38_85: // %cond.load73 |
| ; CHECK-NEXT: mov w9, #19 // =0x13 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #20, .LBB38_23 |
| ; CHECK-NEXT: .LBB38_86: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #21, .LBB38_24 |
| ; CHECK-NEXT: .LBB38_87: // %cond.load81 |
| ; CHECK-NEXT: mov w9, #21 // =0x15 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #22, .LBB38_25 |
| ; CHECK-NEXT: .LBB38_88: // %cond.load85 |
| ; CHECK-NEXT: mov w9, #22 // =0x16 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #23, .LBB38_26 |
| ; CHECK-NEXT: .LBB38_89: // %cond.load89 |
| ; CHECK-NEXT: mov w9, #23 // =0x17 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #24, .LBB38_27 |
| ; CHECK-NEXT: .LBB38_90: // %cond.load93 |
| ; CHECK-NEXT: mov w9, #24 // =0x18 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #25, .LBB38_28 |
| ; CHECK-NEXT: .LBB38_91: // %cond.load97 |
| ; CHECK-NEXT: mov w9, #25 // =0x19 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #26, .LBB38_29 |
| ; CHECK-NEXT: .LBB38_92: // %cond.load101 |
| ; CHECK-NEXT: mov w9, #26 // =0x1a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #27, .LBB38_30 |
| ; CHECK-NEXT: .LBB38_93: // %cond.load105 |
| ; CHECK-NEXT: mov w9, #27 // =0x1b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #28, .LBB38_31 |
| ; CHECK-NEXT: .LBB38_94: // %cond.load109 |
| ; CHECK-NEXT: mov w9, #28 // =0x1c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #29, .LBB38_32 |
| ; CHECK-NEXT: .LBB38_95: // %cond.load113 |
| ; CHECK-NEXT: mov w9, #29 // =0x1d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #30, .LBB38_33 |
| ; CHECK-NEXT: .LBB38_96: // %cond.load117 |
| ; CHECK-NEXT: mov w9, #30 // =0x1e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #31, .LBB38_34 |
| ; CHECK-NEXT: .LBB38_97: // %cond.load121 |
| ; CHECK-NEXT: mov w9, #31 // =0x1f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #32, .LBB38_35 |
| ; CHECK-NEXT: .LBB38_98: // %cond.load125 |
| ; CHECK-NEXT: mov w9, #32 // =0x20 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #33, .LBB38_36 |
| ; CHECK-NEXT: .LBB38_99: // %cond.load129 |
| ; CHECK-NEXT: mov w9, #33 // =0x21 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #34, .LBB38_37 |
| ; CHECK-NEXT: .LBB38_100: // %cond.load133 |
| ; CHECK-NEXT: mov w9, #34 // =0x22 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #35, .LBB38_38 |
| ; CHECK-NEXT: .LBB38_101: // %cond.load137 |
| ; CHECK-NEXT: mov w9, #35 // =0x23 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #36, .LBB38_39 |
| ; CHECK-NEXT: .LBB38_102: // %cond.load141 |
| ; CHECK-NEXT: mov w9, #36 // =0x24 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #37, .LBB38_40 |
| ; CHECK-NEXT: .LBB38_103: // %cond.load145 |
| ; CHECK-NEXT: mov w9, #37 // =0x25 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #38, .LBB38_41 |
| ; CHECK-NEXT: .LBB38_104: // %cond.load149 |
| ; CHECK-NEXT: mov w9, #38 // =0x26 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #39, .LBB38_42 |
| ; CHECK-NEXT: .LBB38_105: // %cond.load153 |
| ; CHECK-NEXT: mov w9, #39 // =0x27 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #40, .LBB38_43 |
| ; CHECK-NEXT: .LBB38_106: // %cond.load157 |
| ; CHECK-NEXT: mov w9, #40 // =0x28 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #41, .LBB38_44 |
| ; CHECK-NEXT: .LBB38_107: // %cond.load161 |
| ; CHECK-NEXT: mov w9, #41 // =0x29 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #42, .LBB38_45 |
| ; CHECK-NEXT: .LBB38_108: // %cond.load165 |
| ; CHECK-NEXT: mov w9, #42 // =0x2a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #43, .LBB38_46 |
| ; CHECK-NEXT: .LBB38_109: // %cond.load169 |
| ; CHECK-NEXT: mov w9, #43 // =0x2b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #44, .LBB38_47 |
| ; CHECK-NEXT: .LBB38_110: // %cond.load173 |
| ; CHECK-NEXT: mov w9, #44 // =0x2c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #45, .LBB38_48 |
| ; CHECK-NEXT: .LBB38_111: // %cond.load177 |
| ; CHECK-NEXT: mov w9, #45 // =0x2d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #46, .LBB38_49 |
| ; CHECK-NEXT: .LBB38_112: // %cond.load181 |
| ; CHECK-NEXT: mov w9, #46 // =0x2e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #47, .LBB38_50 |
| ; CHECK-NEXT: .LBB38_113: // %cond.load185 |
| ; CHECK-NEXT: mov w9, #47 // =0x2f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #48, .LBB38_51 |
| ; CHECK-NEXT: .LBB38_114: // %cond.load189 |
| ; CHECK-NEXT: mov w9, #48 // =0x30 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #49, .LBB38_52 |
| ; CHECK-NEXT: .LBB38_115: // %cond.load193 |
| ; CHECK-NEXT: mov w9, #49 // =0x31 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #50, .LBB38_53 |
| ; CHECK-NEXT: .LBB38_116: // %cond.load197 |
| ; CHECK-NEXT: mov w9, #50 // =0x32 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #51, .LBB38_54 |
| ; CHECK-NEXT: .LBB38_117: // %cond.load201 |
| ; CHECK-NEXT: mov w9, #51 // =0x33 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #52, .LBB38_55 |
| ; CHECK-NEXT: .LBB38_118: // %cond.load205 |
| ; CHECK-NEXT: mov w9, #52 // =0x34 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #53, .LBB38_56 |
| ; CHECK-NEXT: .LBB38_119: // %cond.load209 |
| ; CHECK-NEXT: mov w9, #53 // =0x35 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #54, .LBB38_57 |
| ; CHECK-NEXT: .LBB38_120: // %cond.load213 |
| ; CHECK-NEXT: mov w9, #54 // =0x36 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #55, .LBB38_58 |
| ; CHECK-NEXT: .LBB38_121: // %cond.load217 |
| ; CHECK-NEXT: mov w9, #55 // =0x37 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #56, .LBB38_59 |
| ; CHECK-NEXT: .LBB38_122: // %cond.load221 |
| ; CHECK-NEXT: mov w9, #56 // =0x38 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #57, .LBB38_60 |
| ; CHECK-NEXT: .LBB38_123: // %cond.load225 |
| ; CHECK-NEXT: mov w9, #57 // =0x39 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #58, .LBB38_61 |
| ; CHECK-NEXT: .LBB38_124: // %cond.load229 |
| ; CHECK-NEXT: mov w9, #58 // =0x3a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #59, .LBB38_62 |
| ; CHECK-NEXT: .LBB38_125: // %cond.load233 |
| ; CHECK-NEXT: mov w9, #59 // =0x3b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #60, .LBB38_63 |
| ; CHECK-NEXT: .LBB38_126: // %cond.load237 |
| ; CHECK-NEXT: mov w9, #60 // =0x3c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #61, .LBB38_64 |
| ; CHECK-NEXT: .LBB38_127: // %cond.load241 |
| ; CHECK-NEXT: mov w9, #61 // =0x3d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #62, .LBB38_65 |
| ; CHECK-NEXT: .LBB38_128: // %cond.load245 |
| ; CHECK-NEXT: mov w9, #62 // =0x3e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbnz x8, #63, .LBB38_66 |
| ; CHECK-NEXT: b .LBB38_67 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v64i8i32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl64 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s |
| ; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1sb { z0.s }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <64 x i8>, ptr %bp |
| %mask = icmp eq <64 x i8> %b, zeroinitializer |
| %load = call <64 x i8> @llvm.masked.expandload.v64i8(ptr %ap, <64 x i1> %mask, <64 x i8> poison) |
| %ext = sext <64 x i8> %load to <64 x i32> |
| store <64 x i32> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v32i8i64(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_sext_v32i8i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #112 |
| ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: ptrue p1.b, vl32 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x1] |
| ; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, #0 |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: umov w13, v0.b[1] |
| ; CHECK-NEXT: fmov w6, s0 |
| ; CHECK-NEXT: umov w4, v0.b[7] |
| ; CHECK-NEXT: umov w5, v0.b[8] |
| ; CHECK-NEXT: umov w12, v0.b[2] |
| ; CHECK-NEXT: umov w3, v0.b[9] |
| ; CHECK-NEXT: mov z5.b, z0.b[18] |
| ; CHECK-NEXT: mov z6.b, z0.b[19] |
| ; CHECK-NEXT: umov w11, v0.b[3] |
| ; CHECK-NEXT: and w6, w6, #0x1 |
| ; CHECK-NEXT: umov w1, v0.b[10] |
| ; CHECK-NEXT: mov z7.b, z0.b[20] |
| ; CHECK-NEXT: bfi w6, w13, #1, #1 |
| ; CHECK-NEXT: umov w18, v0.b[11] |
| ; CHECK-NEXT: mov z16.b, z0.b[21] |
| ; CHECK-NEXT: ubfiz w13, w4, #7, #1 |
| ; CHECK-NEXT: ubfiz w4, w5, #8, #1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: mov z17.b, z0.b[22] |
| ; CHECK-NEXT: fmov w20, s5 |
| ; CHECK-NEXT: fmov w21, s6 |
| ; CHECK-NEXT: bfi w6, w12, #2, #1 |
| ; CHECK-NEXT: umov w16, v0.b[12] |
| ; CHECK-NEXT: mov z18.b, z0.b[23] |
| ; CHECK-NEXT: fmov w22, s7 |
| ; CHECK-NEXT: orr w12, w13, w4 |
| ; CHECK-NEXT: ubfiz w13, w3, #9, #1 |
| ; CHECK-NEXT: umov w9, v0.b[5] |
| ; CHECK-NEXT: umov w17, v0.b[13] |
| ; CHECK-NEXT: mov z19.b, z0.b[24] |
| ; CHECK-NEXT: fmov w23, s16 |
| ; CHECK-NEXT: bfi w6, w11, #3, #1 |
| ; CHECK-NEXT: ubfiz w11, w1, #10, #1 |
| ; CHECK-NEXT: mov z20.b, z0.b[25] |
| ; CHECK-NEXT: fmov w24, s17 |
| ; CHECK-NEXT: ubfiz w3, w20, #18, #1 |
| ; CHECK-NEXT: ubfiz w4, w21, #19, #1 |
| ; CHECK-NEXT: orr w12, w12, w13 |
| ; CHECK-NEXT: ubfiz w13, w18, #11, #1 |
| ; CHECK-NEXT: mov z21.b, z0.b[26] |
| ; CHECK-NEXT: fmov w25, s18 |
| ; CHECK-NEXT: ubfiz w1, w22, #20, #1 |
| ; CHECK-NEXT: orr w11, w12, w11 |
| ; CHECK-NEXT: bfi w6, w10, #4, #1 |
| ; CHECK-NEXT: umov w14, v0.b[14] |
| ; CHECK-NEXT: fmov w26, s19 |
| ; CHECK-NEXT: orr w3, w3, w4 |
| ; CHECK-NEXT: orr w11, w11, w13 |
| ; CHECK-NEXT: ubfiz w12, w16, #12, #1 |
| ; CHECK-NEXT: ubfiz w13, w23, #21, #1 |
| ; CHECK-NEXT: mov z22.b, z0.b[27] |
| ; CHECK-NEXT: fmov w27, s20 |
| ; CHECK-NEXT: orr w10, w3, w1 |
| ; CHECK-NEXT: bfi w6, w9, #5, #1 |
| ; CHECK-NEXT: ubfiz w9, w17, #13, #1 |
| ; CHECK-NEXT: ubfiz w16, w24, #22, #1 |
| ; CHECK-NEXT: umov w8, v0.b[6] |
| ; CHECK-NEXT: umov w15, v0.b[15] |
| ; CHECK-NEXT: mov z3.b, z0.b[16] |
| ; CHECK-NEXT: mov z23.b, z0.b[28] |
| ; CHECK-NEXT: fmov w5, s21 |
| ; CHECK-NEXT: orr w11, w11, w12 |
| ; CHECK-NEXT: orr w10, w10, w13 |
| ; CHECK-NEXT: ubfiz w12, w25, #23, #1 |
| ; CHECK-NEXT: mov z4.b, z0.b[17] |
| ; CHECK-NEXT: mov z24.b, z0.b[29] |
| ; CHECK-NEXT: orr w9, w11, w9 |
| ; CHECK-NEXT: orr w10, w10, w16 |
| ; CHECK-NEXT: ubfiz w11, w26, #24, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[30] |
| ; CHECK-NEXT: fmov w28, s22 |
| ; CHECK-NEXT: orr w10, w10, w12 |
| ; CHECK-NEXT: ubfiz w12, w14, #14, #1 |
| ; CHECK-NEXT: ubfiz w13, w27, #25, #1 |
| ; CHECK-NEXT: fmov w7, s3 |
| ; CHECK-NEXT: fmov w29, s23 |
| ; CHECK-NEXT: orr w10, w10, w11 |
| ; CHECK-NEXT: ubfiz w14, w5, #26, #1 |
| ; CHECK-NEXT: fmov w19, s4 |
| ; CHECK-NEXT: fmov w30, s24 |
| ; CHECK-NEXT: ubfiz w11, w15, #15, #1 |
| ; CHECK-NEXT: bfi w6, w8, #6, #1 |
| ; CHECK-NEXT: orr w8, w9, w12 |
| ; CHECK-NEXT: orr w9, w10, w13 |
| ; CHECK-NEXT: orr w9, w9, w14 |
| ; CHECK-NEXT: ubfiz w10, w28, #27, #1 |
| ; CHECK-NEXT: fmov w14, s2 |
| ; CHECK-NEXT: orr w8, w8, w11 |
| ; CHECK-NEXT: ubfiz w11, w7, #16, #1 |
| ; CHECK-NEXT: ubfiz w13, w29, #28, #1 |
| ; CHECK-NEXT: ubfiz w12, w19, #17, #1 |
| ; CHECK-NEXT: orr w9, w9, w10 |
| ; CHECK-NEXT: ubfiz w10, w30, #29, #1 |
| ; CHECK-NEXT: mov z1.b, z0.b[31] |
| ; CHECK-NEXT: orr w8, w8, w11 |
| ; CHECK-NEXT: orr w9, w9, w13 |
| ; CHECK-NEXT: ubfiz w11, w14, #30, #1 |
| ; CHECK-NEXT: orr w8, w8, w12 |
| ; CHECK-NEXT: orr w9, w9, w10 |
| ; CHECK-NEXT: orr w8, w6, w8 |
| ; CHECK-NEXT: orr w9, w9, w11 |
| ; CHECK-NEXT: orr w8, w8, w9 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: orr w8, w8, w9, lsl #31 |
| ; CHECK-NEXT: tbz w8, #0, .LBB39_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] |
| ; CHECK-NEXT: add x0, x0, #1 |
| ; CHECK-NEXT: tbnz w8, #1, .LBB39_3 |
| ; CHECK-NEXT: b .LBB39_4 |
| ; CHECK-NEXT: .LBB39_2: |
| ; CHECK-NEXT: adrp x9, .LCPI39_0 |
| ; CHECK-NEXT: add x9, x9, :lo12:.LCPI39_0 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9] |
| ; CHECK-NEXT: tbz w8, #1, .LBB39_4 |
| ; CHECK-NEXT: .LBB39_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: .LBB39_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB39_36 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB39_37 |
| ; CHECK-NEXT: .LBB39_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB39_38 |
| ; CHECK-NEXT: .LBB39_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB39_39 |
| ; CHECK-NEXT: .LBB39_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB39_40 |
| ; CHECK-NEXT: .LBB39_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB39_41 |
| ; CHECK-NEXT: .LBB39_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB39_42 |
| ; CHECK-NEXT: .LBB39_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB39_43 |
| ; CHECK-NEXT: .LBB39_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB39_44 |
| ; CHECK-NEXT: .LBB39_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB39_45 |
| ; CHECK-NEXT: .LBB39_14: // %else42 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB39_46 |
| ; CHECK-NEXT: .LBB39_15: // %else46 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB39_47 |
| ; CHECK-NEXT: .LBB39_16: // %else50 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB39_48 |
| ; CHECK-NEXT: .LBB39_17: // %else54 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB39_49 |
| ; CHECK-NEXT: .LBB39_18: // %else58 |
| ; CHECK-NEXT: tbnz w8, #16, .LBB39_50 |
| ; CHECK-NEXT: .LBB39_19: // %else62 |
| ; CHECK-NEXT: tbnz w8, #17, .LBB39_51 |
| ; CHECK-NEXT: .LBB39_20: // %else66 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB39_52 |
| ; CHECK-NEXT: .LBB39_21: // %else70 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB39_53 |
| ; CHECK-NEXT: .LBB39_22: // %else74 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB39_54 |
| ; CHECK-NEXT: .LBB39_23: // %else78 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB39_55 |
| ; CHECK-NEXT: .LBB39_24: // %else82 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB39_56 |
| ; CHECK-NEXT: .LBB39_25: // %else86 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB39_57 |
| ; CHECK-NEXT: .LBB39_26: // %else90 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB39_58 |
| ; CHECK-NEXT: .LBB39_27: // %else94 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB39_59 |
| ; CHECK-NEXT: .LBB39_28: // %else98 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB39_60 |
| ; CHECK-NEXT: .LBB39_29: // %else102 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB39_61 |
| ; CHECK-NEXT: .LBB39_30: // %else106 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB39_62 |
| ; CHECK-NEXT: .LBB39_31: // %else110 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB39_63 |
| ; CHECK-NEXT: .LBB39_32: // %else114 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB39_64 |
| ; CHECK-NEXT: .LBB39_33: // %else118 |
| ; CHECK-NEXT: tbz w8, #31, .LBB39_35 |
| ; CHECK-NEXT: .LBB39_34: // %cond.load121 |
| ; CHECK-NEXT: mov w8, #31 // =0x1f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0] |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: .LBB39_35: // %else122 |
| ; CHECK-NEXT: sunpklo z0.h, z0.b |
| ; CHECK-NEXT: ptrue p0.d, vl32 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: sunpklo z0.s, z0.h |
| ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: sunpklo z0.d, z0.s |
| ; CHECK-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-NEXT: add sp, sp, #112 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB39_36: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #3, .LBB39_6 |
| ; CHECK-NEXT: .LBB39_37: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #4, .LBB39_7 |
| ; CHECK-NEXT: .LBB39_38: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #5, .LBB39_8 |
| ; CHECK-NEXT: .LBB39_39: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #6, .LBB39_9 |
| ; CHECK-NEXT: .LBB39_40: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #7, .LBB39_10 |
| ; CHECK-NEXT: .LBB39_41: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #8, .LBB39_11 |
| ; CHECK-NEXT: .LBB39_42: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #9, .LBB39_12 |
| ; CHECK-NEXT: .LBB39_43: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #10, .LBB39_13 |
| ; CHECK-NEXT: .LBB39_44: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #11, .LBB39_14 |
| ; CHECK-NEXT: .LBB39_45: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #12, .LBB39_15 |
| ; CHECK-NEXT: .LBB39_46: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #13, .LBB39_16 |
| ; CHECK-NEXT: .LBB39_47: // %cond.load49 |
| ; CHECK-NEXT: mov w9, #13 // =0xd |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #14, .LBB39_17 |
| ; CHECK-NEXT: .LBB39_48: // %cond.load53 |
| ; CHECK-NEXT: mov w9, #14 // =0xe |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #15, .LBB39_18 |
| ; CHECK-NEXT: .LBB39_49: // %cond.load57 |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #16, .LBB39_19 |
| ; CHECK-NEXT: .LBB39_50: // %cond.load61 |
| ; CHECK-NEXT: mov w9, #16 // =0x10 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #17, .LBB39_20 |
| ; CHECK-NEXT: .LBB39_51: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #18, .LBB39_21 |
| ; CHECK-NEXT: .LBB39_52: // %cond.load69 |
| ; CHECK-NEXT: mov w9, #18 // =0x12 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #19, .LBB39_22 |
| ; CHECK-NEXT: .LBB39_53: // %cond.load73 |
| ; CHECK-NEXT: mov w9, #19 // =0x13 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #20, .LBB39_23 |
| ; CHECK-NEXT: .LBB39_54: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #21, .LBB39_24 |
| ; CHECK-NEXT: .LBB39_55: // %cond.load81 |
| ; CHECK-NEXT: mov w9, #21 // =0x15 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #22, .LBB39_25 |
| ; CHECK-NEXT: .LBB39_56: // %cond.load85 |
| ; CHECK-NEXT: mov w9, #22 // =0x16 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #23, .LBB39_26 |
| ; CHECK-NEXT: .LBB39_57: // %cond.load89 |
| ; CHECK-NEXT: mov w9, #23 // =0x17 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #24, .LBB39_27 |
| ; CHECK-NEXT: .LBB39_58: // %cond.load93 |
| ; CHECK-NEXT: mov w9, #24 // =0x18 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #25, .LBB39_28 |
| ; CHECK-NEXT: .LBB39_59: // %cond.load97 |
| ; CHECK-NEXT: mov w9, #25 // =0x19 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #26, .LBB39_29 |
| ; CHECK-NEXT: .LBB39_60: // %cond.load101 |
| ; CHECK-NEXT: mov w9, #26 // =0x1a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #27, .LBB39_30 |
| ; CHECK-NEXT: .LBB39_61: // %cond.load105 |
| ; CHECK-NEXT: mov w9, #27 // =0x1b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #28, .LBB39_31 |
| ; CHECK-NEXT: .LBB39_62: // %cond.load109 |
| ; CHECK-NEXT: mov w9, #28 // =0x1c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #29, .LBB39_32 |
| ; CHECK-NEXT: .LBB39_63: // %cond.load113 |
| ; CHECK-NEXT: mov w9, #29 // =0x1d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #30, .LBB39_33 |
| ; CHECK-NEXT: .LBB39_64: // %cond.load117 |
| ; CHECK-NEXT: mov w9, #30 // =0x1e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbnz w8, #31, .LBB39_34 |
| ; CHECK-NEXT: b .LBB39_35 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v32i8i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl32 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.d |
| ; CHECK-EXPAND-NEXT: whilelo p2.d, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1sb { z0.d }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.d, p1, z0.d |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <32 x i8>, ptr %bp |
| %mask = icmp eq <32 x i8> %b, zeroinitializer |
| %load = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %ap, <32 x i1> %mask, <32 x i8> poison) |
| %ext = sext <32 x i8> %load to <32 x i64> |
| store <32 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v64i16i32(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_sext_v64i16i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #112 |
| ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: ptrue p1.h, vl64 |
| ; CHECK-NEXT: str x2, [sp] // 8-byte Spill |
| ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x1] |
| ; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, #0 |
| ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: ptrue p0.h |
| ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-NEXT: umov w12, v0.b[1] |
| ; CHECK-NEXT: fmov w25, s0 |
| ; CHECK-NEXT: mov z3.b, z0.b[18] |
| ; CHECK-NEXT: mov z4.b, z0.b[19] |
| ; CHECK-NEXT: umov w13, v0.b[2] |
| ; CHECK-NEXT: umov w14, v0.b[7] |
| ; CHECK-NEXT: umov w3, v0.b[8] |
| ; CHECK-NEXT: mov z5.b, z0.b[20] |
| ; CHECK-NEXT: umov w4, v0.b[9] |
| ; CHECK-NEXT: mov z6.b, z0.b[21] |
| ; CHECK-NEXT: and x25, x25, #0x1 |
| ; CHECK-NEXT: umov w5, v0.b[10] |
| ; CHECK-NEXT: mov z7.b, z0.b[22] |
| ; CHECK-NEXT: fmov w19, s3 |
| ; CHECK-NEXT: fmov w20, s4 |
| ; CHECK-NEXT: bfi x25, x12, #1, #1 |
| ; CHECK-NEXT: umov w11, v0.b[3] |
| ; CHECK-NEXT: mov z16.b, z0.b[23] |
| ; CHECK-NEXT: fmov w21, s5 |
| ; CHECK-NEXT: umov w15, v0.b[11] |
| ; CHECK-NEXT: fmov w22, s6 |
| ; CHECK-NEXT: bfi x25, x13, #2, #1 |
| ; CHECK-NEXT: ubfiz x13, x14, #7, #1 |
| ; CHECK-NEXT: ubfiz x14, x3, #8, #1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: umov w17, v0.b[12] |
| ; CHECK-NEXT: mov z17.b, z0.b[24] |
| ; CHECK-NEXT: fmov w23, s7 |
| ; CHECK-NEXT: ubfiz x3, x4, #9, #1 |
| ; CHECK-NEXT: ubfiz x4, x19, #18, #1 |
| ; CHECK-NEXT: ubfiz x19, x20, #19, #1 |
| ; CHECK-NEXT: umov w18, v0.b[13] |
| ; CHECK-NEXT: mov z18.b, z0.b[25] |
| ; CHECK-NEXT: fmov w24, s16 |
| ; CHECK-NEXT: orr x13, x13, x14 |
| ; CHECK-NEXT: ubfiz x14, x5, #10, #1 |
| ; CHECK-NEXT: ubfiz x5, x21, #20, #1 |
| ; CHECK-NEXT: umov w9, v0.b[5] |
| ; CHECK-NEXT: umov w16, v0.b[14] |
| ; CHECK-NEXT: mov z19.b, z0.b[26] |
| ; CHECK-NEXT: orr x4, x4, x19 |
| ; CHECK-NEXT: orr x13, x13, x3 |
| ; CHECK-NEXT: ubfiz x3, x22, #21, #1 |
| ; CHECK-NEXT: bfi x25, x11, #3, #1 |
| ; CHECK-NEXT: mov z20.b, z0.b[27] |
| ; CHECK-NEXT: fmov w26, s17 |
| ; CHECK-NEXT: orr x11, x13, x14 |
| ; CHECK-NEXT: orr x13, x4, x5 |
| ; CHECK-NEXT: ubfiz x14, x15, #11, #1 |
| ; CHECK-NEXT: ubfiz x15, x23, #22, #1 |
| ; CHECK-NEXT: mov z1.b, z0.b[16] |
| ; CHECK-NEXT: mov z21.b, z0.b[28] |
| ; CHECK-NEXT: fmov w27, s18 |
| ; CHECK-NEXT: orr x13, x13, x3 |
| ; CHECK-NEXT: bfi x25, x10, #4, #1 |
| ; CHECK-NEXT: ubfiz x10, x17, #12, #1 |
| ; CHECK-NEXT: ubfiz x17, x24, #23, #1 |
| ; CHECK-NEXT: umov w1, v0.b[15] |
| ; CHECK-NEXT: mov z2.b, z0.b[17] |
| ; CHECK-NEXT: mov z4.b, z0.b[29] |
| ; CHECK-NEXT: fmov w28, s19 |
| ; CHECK-NEXT: orr x11, x11, x14 |
| ; CHECK-NEXT: orr x13, x13, x15 |
| ; CHECK-NEXT: ubfiz x14, x18, #13, #1 |
| ; CHECK-NEXT: mov z5.b, z0.b[30] |
| ; CHECK-NEXT: fmov w29, s20 |
| ; CHECK-NEXT: orr x10, x11, x10 |
| ; CHECK-NEXT: bfi x25, x9, #5, #1 |
| ; CHECK-NEXT: orr x9, x13, x17 |
| ; CHECK-NEXT: ubfiz x11, x16, #14, #1 |
| ; CHECK-NEXT: ubfiz x13, x26, #24, #1 |
| ; CHECK-NEXT: fmov w6, s1 |
| ; CHECK-NEXT: fmov w12, s21 |
| ; CHECK-NEXT: orr x10, x10, x14 |
| ; CHECK-NEXT: ubfiz x15, x27, #25, #1 |
| ; CHECK-NEXT: umov w2, v0.b[6] |
| ; CHECK-NEXT: fmov w7, s2 |
| ; CHECK-NEXT: fmov w30, s4 |
| ; CHECK-NEXT: orr x10, x10, x11 |
| ; CHECK-NEXT: orr x9, x9, x13 |
| ; CHECK-NEXT: ubfiz x11, x28, #26, #1 |
| ; CHECK-NEXT: fmov w8, s5 |
| ; CHECK-NEXT: ubfiz x14, x1, #15, #1 |
| ; CHECK-NEXT: orr x9, x9, x15 |
| ; CHECK-NEXT: ubfiz x13, x29, #27, #1 |
| ; CHECK-NEXT: mov z3.b, z0.b[31] |
| ; CHECK-NEXT: orr x9, x9, x11 |
| ; CHECK-NEXT: ubfiz x11, x6, #16, #1 |
| ; CHECK-NEXT: ubfiz x12, x12, #28, #1 |
| ; CHECK-NEXT: orr x10, x10, x14 |
| ; CHECK-NEXT: orr x9, x9, x13 |
| ; CHECK-NEXT: ubfiz x13, x7, #17, #1 |
| ; CHECK-NEXT: ubfiz x14, x30, #29, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[32] |
| ; CHECK-NEXT: bfi x25, x2, #6, #1 |
| ; CHECK-NEXT: orr x10, x10, x11 |
| ; CHECK-NEXT: orr x9, x9, x12 |
| ; CHECK-NEXT: ubfiz x8, x8, #30, #1 |
| ; CHECK-NEXT: fmov w11, s3 |
| ; CHECK-NEXT: orr x10, x10, x13 |
| ; CHECK-NEXT: orr x9, x9, x14 |
| ; CHECK-NEXT: mov z1.b, z0.b[33] |
| ; CHECK-NEXT: orr x10, x25, x10 |
| ; CHECK-NEXT: orr x8, x9, x8 |
| ; CHECK-NEXT: orr x8, x10, x8 |
| ; CHECK-NEXT: fmov w10, s2 |
| ; CHECK-NEXT: lsl w9, w11, #31 |
| ; CHECK-NEXT: mov z2.b, z0.b[34] |
| ; CHECK-NEXT: orr x8, x8, x9 |
| ; CHECK-NEXT: and w9, w10, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #32 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[35] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #33 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[36] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #34 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[37] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #35 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[38] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #36 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[39] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #37 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[40] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #38 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[41] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #39 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[42] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #40 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[43] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #41 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[44] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #42 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[45] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #43 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[46] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #44 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[47] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #45 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[48] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #46 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[49] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #47 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[50] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #48 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[51] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #49 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[52] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #50 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[53] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #51 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[54] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #52 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[55] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #53 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[56] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #54 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[57] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #55 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[58] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #56 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[59] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #57 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[60] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #58 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[61] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: fmov w10, s1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #59 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[62] |
| ; CHECK-NEXT: mov z0.b, z0.b[63] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #60 |
| ; CHECK-NEXT: and w9, w10, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #61 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #62 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #63 |
| ; CHECK-NEXT: tbz w8, #0, .LBB40_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0] |
| ; CHECK-NEXT: add x0, x0, #2 |
| ; CHECK-NEXT: tbnz w8, #1, .LBB40_3 |
| ; CHECK-NEXT: b .LBB40_4 |
| ; CHECK-NEXT: .LBB40_2: |
| ; CHECK-NEXT: adrp x9, .LCPI40_0 |
| ; CHECK-NEXT: add x9, x9, :lo12:.LCPI40_0 |
| ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x9] |
| ; CHECK-NEXT: tbz w8, #1, .LBB40_4 |
| ; CHECK-NEXT: .LBB40_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: .LBB40_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB40_68 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB40_69 |
| ; CHECK-NEXT: .LBB40_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB40_70 |
| ; CHECK-NEXT: .LBB40_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB40_71 |
| ; CHECK-NEXT: .LBB40_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB40_72 |
| ; CHECK-NEXT: .LBB40_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB40_73 |
| ; CHECK-NEXT: .LBB40_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB40_74 |
| ; CHECK-NEXT: .LBB40_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB40_75 |
| ; CHECK-NEXT: .LBB40_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB40_76 |
| ; CHECK-NEXT: .LBB40_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB40_77 |
| ; CHECK-NEXT: .LBB40_14: // %else42 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB40_78 |
| ; CHECK-NEXT: .LBB40_15: // %else46 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB40_79 |
| ; CHECK-NEXT: .LBB40_16: // %else50 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB40_80 |
| ; CHECK-NEXT: .LBB40_17: // %else54 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB40_81 |
| ; CHECK-NEXT: .LBB40_18: // %else58 |
| ; CHECK-NEXT: tbnz w8, #16, .LBB40_82 |
| ; CHECK-NEXT: .LBB40_19: // %else62 |
| ; CHECK-NEXT: tbnz w8, #17, .LBB40_83 |
| ; CHECK-NEXT: .LBB40_20: // %else66 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB40_84 |
| ; CHECK-NEXT: .LBB40_21: // %else70 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB40_85 |
| ; CHECK-NEXT: .LBB40_22: // %else74 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB40_86 |
| ; CHECK-NEXT: .LBB40_23: // %else78 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB40_87 |
| ; CHECK-NEXT: .LBB40_24: // %else82 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB40_88 |
| ; CHECK-NEXT: .LBB40_25: // %else86 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB40_89 |
| ; CHECK-NEXT: .LBB40_26: // %else90 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB40_90 |
| ; CHECK-NEXT: .LBB40_27: // %else94 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB40_91 |
| ; CHECK-NEXT: .LBB40_28: // %else98 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB40_92 |
| ; CHECK-NEXT: .LBB40_29: // %else102 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB40_93 |
| ; CHECK-NEXT: .LBB40_30: // %else106 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB40_94 |
| ; CHECK-NEXT: .LBB40_31: // %else110 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB40_95 |
| ; CHECK-NEXT: .LBB40_32: // %else114 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB40_96 |
| ; CHECK-NEXT: .LBB40_33: // %else118 |
| ; CHECK-NEXT: tbnz w8, #31, .LBB40_97 |
| ; CHECK-NEXT: .LBB40_34: // %else122 |
| ; CHECK-NEXT: tbnz x8, #32, .LBB40_98 |
| ; CHECK-NEXT: .LBB40_35: // %else126 |
| ; CHECK-NEXT: tbnz x8, #33, .LBB40_99 |
| ; CHECK-NEXT: .LBB40_36: // %else130 |
| ; CHECK-NEXT: tbnz x8, #34, .LBB40_100 |
| ; CHECK-NEXT: .LBB40_37: // %else134 |
| ; CHECK-NEXT: tbnz x8, #35, .LBB40_101 |
| ; CHECK-NEXT: .LBB40_38: // %else138 |
| ; CHECK-NEXT: tbnz x8, #36, .LBB40_102 |
| ; CHECK-NEXT: .LBB40_39: // %else142 |
| ; CHECK-NEXT: tbnz x8, #37, .LBB40_103 |
| ; CHECK-NEXT: .LBB40_40: // %else146 |
| ; CHECK-NEXT: tbnz x8, #38, .LBB40_104 |
| ; CHECK-NEXT: .LBB40_41: // %else150 |
| ; CHECK-NEXT: tbnz x8, #39, .LBB40_105 |
| ; CHECK-NEXT: .LBB40_42: // %else154 |
| ; CHECK-NEXT: tbnz x8, #40, .LBB40_106 |
| ; CHECK-NEXT: .LBB40_43: // %else158 |
| ; CHECK-NEXT: tbnz x8, #41, .LBB40_107 |
| ; CHECK-NEXT: .LBB40_44: // %else162 |
| ; CHECK-NEXT: tbnz x8, #42, .LBB40_108 |
| ; CHECK-NEXT: .LBB40_45: // %else166 |
| ; CHECK-NEXT: tbnz x8, #43, .LBB40_109 |
| ; CHECK-NEXT: .LBB40_46: // %else170 |
| ; CHECK-NEXT: tbnz x8, #44, .LBB40_110 |
| ; CHECK-NEXT: .LBB40_47: // %else174 |
| ; CHECK-NEXT: tbnz x8, #45, .LBB40_111 |
| ; CHECK-NEXT: .LBB40_48: // %else178 |
| ; CHECK-NEXT: tbnz x8, #46, .LBB40_112 |
| ; CHECK-NEXT: .LBB40_49: // %else182 |
| ; CHECK-NEXT: tbnz x8, #47, .LBB40_113 |
| ; CHECK-NEXT: .LBB40_50: // %else186 |
| ; CHECK-NEXT: tbnz x8, #48, .LBB40_114 |
| ; CHECK-NEXT: .LBB40_51: // %else190 |
| ; CHECK-NEXT: tbnz x8, #49, .LBB40_115 |
| ; CHECK-NEXT: .LBB40_52: // %else194 |
| ; CHECK-NEXT: tbnz x8, #50, .LBB40_116 |
| ; CHECK-NEXT: .LBB40_53: // %else198 |
| ; CHECK-NEXT: tbnz x8, #51, .LBB40_117 |
| ; CHECK-NEXT: .LBB40_54: // %else202 |
| ; CHECK-NEXT: tbnz x8, #52, .LBB40_118 |
| ; CHECK-NEXT: .LBB40_55: // %else206 |
| ; CHECK-NEXT: tbnz x8, #53, .LBB40_119 |
| ; CHECK-NEXT: .LBB40_56: // %else210 |
| ; CHECK-NEXT: tbnz x8, #54, .LBB40_120 |
| ; CHECK-NEXT: .LBB40_57: // %else214 |
| ; CHECK-NEXT: tbnz x8, #55, .LBB40_121 |
| ; CHECK-NEXT: .LBB40_58: // %else218 |
| ; CHECK-NEXT: tbnz x8, #56, .LBB40_122 |
| ; CHECK-NEXT: .LBB40_59: // %else222 |
| ; CHECK-NEXT: tbnz x8, #57, .LBB40_123 |
| ; CHECK-NEXT: .LBB40_60: // %else226 |
| ; CHECK-NEXT: tbnz x8, #58, .LBB40_124 |
| ; CHECK-NEXT: .LBB40_61: // %else230 |
| ; CHECK-NEXT: tbnz x8, #59, .LBB40_125 |
| ; CHECK-NEXT: .LBB40_62: // %else234 |
| ; CHECK-NEXT: tbnz x8, #60, .LBB40_126 |
| ; CHECK-NEXT: .LBB40_63: // %else238 |
| ; CHECK-NEXT: tbnz x8, #61, .LBB40_127 |
| ; CHECK-NEXT: .LBB40_64: // %else242 |
| ; CHECK-NEXT: tbnz x8, #62, .LBB40_128 |
| ; CHECK-NEXT: .LBB40_65: // %else246 |
| ; CHECK-NEXT: tbz x8, #63, .LBB40_67 |
| ; CHECK-NEXT: .LBB40_66: // %cond.load249 |
| ; CHECK-NEXT: mov w8, #63 // =0x3f |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w8 |
| ; CHECK-NEXT: ldrh w8, [x0] |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w8 |
| ; CHECK-NEXT: .LBB40_67: // %else250 |
| ; CHECK-NEXT: sunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s, vl64 |
| ; CHECK-NEXT: ldr x8, [sp] // 8-byte Reload |
| ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: st1w { z0.s }, p0, [x8] |
| ; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: add sp, sp, #112 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB40_68: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #3, .LBB40_6 |
| ; CHECK-NEXT: .LBB40_69: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #4, .LBB40_7 |
| ; CHECK-NEXT: .LBB40_70: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #5, .LBB40_8 |
| ; CHECK-NEXT: .LBB40_71: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #6, .LBB40_9 |
| ; CHECK-NEXT: .LBB40_72: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #7, .LBB40_10 |
| ; CHECK-NEXT: .LBB40_73: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #8, .LBB40_11 |
| ; CHECK-NEXT: .LBB40_74: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #9, .LBB40_12 |
| ; CHECK-NEXT: .LBB40_75: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #10, .LBB40_13 |
| ; CHECK-NEXT: .LBB40_76: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #11, .LBB40_14 |
| ; CHECK-NEXT: .LBB40_77: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #12, .LBB40_15 |
| ; CHECK-NEXT: .LBB40_78: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #13, .LBB40_16 |
| ; CHECK-NEXT: .LBB40_79: // %cond.load49 |
| ; CHECK-NEXT: mov w9, #13 // =0xd |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #14, .LBB40_17 |
| ; CHECK-NEXT: .LBB40_80: // %cond.load53 |
| ; CHECK-NEXT: mov w9, #14 // =0xe |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #15, .LBB40_18 |
| ; CHECK-NEXT: .LBB40_81: // %cond.load57 |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #16, .LBB40_19 |
| ; CHECK-NEXT: .LBB40_82: // %cond.load61 |
| ; CHECK-NEXT: mov w9, #16 // =0x10 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #17, .LBB40_20 |
| ; CHECK-NEXT: .LBB40_83: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #18, .LBB40_21 |
| ; CHECK-NEXT: .LBB40_84: // %cond.load69 |
| ; CHECK-NEXT: mov w9, #18 // =0x12 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #19, .LBB40_22 |
| ; CHECK-NEXT: .LBB40_85: // %cond.load73 |
| ; CHECK-NEXT: mov w9, #19 // =0x13 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #20, .LBB40_23 |
| ; CHECK-NEXT: .LBB40_86: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #21, .LBB40_24 |
| ; CHECK-NEXT: .LBB40_87: // %cond.load81 |
| ; CHECK-NEXT: mov w9, #21 // =0x15 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #22, .LBB40_25 |
| ; CHECK-NEXT: .LBB40_88: // %cond.load85 |
| ; CHECK-NEXT: mov w9, #22 // =0x16 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #23, .LBB40_26 |
| ; CHECK-NEXT: .LBB40_89: // %cond.load89 |
| ; CHECK-NEXT: mov w9, #23 // =0x17 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #24, .LBB40_27 |
| ; CHECK-NEXT: .LBB40_90: // %cond.load93 |
| ; CHECK-NEXT: mov w9, #24 // =0x18 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #25, .LBB40_28 |
| ; CHECK-NEXT: .LBB40_91: // %cond.load97 |
| ; CHECK-NEXT: mov w9, #25 // =0x19 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #26, .LBB40_29 |
| ; CHECK-NEXT: .LBB40_92: // %cond.load101 |
| ; CHECK-NEXT: mov w9, #26 // =0x1a |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #27, .LBB40_30 |
| ; CHECK-NEXT: .LBB40_93: // %cond.load105 |
| ; CHECK-NEXT: mov w9, #27 // =0x1b |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #28, .LBB40_31 |
| ; CHECK-NEXT: .LBB40_94: // %cond.load109 |
| ; CHECK-NEXT: mov w9, #28 // =0x1c |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #29, .LBB40_32 |
| ; CHECK-NEXT: .LBB40_95: // %cond.load113 |
| ; CHECK-NEXT: mov w9, #29 // =0x1d |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #30, .LBB40_33 |
| ; CHECK-NEXT: .LBB40_96: // %cond.load117 |
| ; CHECK-NEXT: mov w9, #30 // =0x1e |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #31, .LBB40_34 |
| ; CHECK-NEXT: .LBB40_97: // %cond.load121 |
| ; CHECK-NEXT: mov w9, #31 // =0x1f |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #32, .LBB40_35 |
| ; CHECK-NEXT: .LBB40_98: // %cond.load125 |
| ; CHECK-NEXT: mov w9, #32 // =0x20 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #33, .LBB40_36 |
| ; CHECK-NEXT: .LBB40_99: // %cond.load129 |
| ; CHECK-NEXT: mov w9, #33 // =0x21 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #34, .LBB40_37 |
| ; CHECK-NEXT: .LBB40_100: // %cond.load133 |
| ; CHECK-NEXT: mov w9, #34 // =0x22 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #35, .LBB40_38 |
| ; CHECK-NEXT: .LBB40_101: // %cond.load137 |
| ; CHECK-NEXT: mov w9, #35 // =0x23 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #36, .LBB40_39 |
| ; CHECK-NEXT: .LBB40_102: // %cond.load141 |
| ; CHECK-NEXT: mov w9, #36 // =0x24 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #37, .LBB40_40 |
| ; CHECK-NEXT: .LBB40_103: // %cond.load145 |
| ; CHECK-NEXT: mov w9, #37 // =0x25 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #38, .LBB40_41 |
| ; CHECK-NEXT: .LBB40_104: // %cond.load149 |
| ; CHECK-NEXT: mov w9, #38 // =0x26 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #39, .LBB40_42 |
| ; CHECK-NEXT: .LBB40_105: // %cond.load153 |
| ; CHECK-NEXT: mov w9, #39 // =0x27 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #40, .LBB40_43 |
| ; CHECK-NEXT: .LBB40_106: // %cond.load157 |
| ; CHECK-NEXT: mov w9, #40 // =0x28 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #41, .LBB40_44 |
| ; CHECK-NEXT: .LBB40_107: // %cond.load161 |
| ; CHECK-NEXT: mov w9, #41 // =0x29 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #42, .LBB40_45 |
| ; CHECK-NEXT: .LBB40_108: // %cond.load165 |
| ; CHECK-NEXT: mov w9, #42 // =0x2a |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #43, .LBB40_46 |
| ; CHECK-NEXT: .LBB40_109: // %cond.load169 |
| ; CHECK-NEXT: mov w9, #43 // =0x2b |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #44, .LBB40_47 |
| ; CHECK-NEXT: .LBB40_110: // %cond.load173 |
| ; CHECK-NEXT: mov w9, #44 // =0x2c |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #45, .LBB40_48 |
| ; CHECK-NEXT: .LBB40_111: // %cond.load177 |
| ; CHECK-NEXT: mov w9, #45 // =0x2d |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #46, .LBB40_49 |
| ; CHECK-NEXT: .LBB40_112: // %cond.load181 |
| ; CHECK-NEXT: mov w9, #46 // =0x2e |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #47, .LBB40_50 |
| ; CHECK-NEXT: .LBB40_113: // %cond.load185 |
| ; CHECK-NEXT: mov w9, #47 // =0x2f |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #48, .LBB40_51 |
| ; CHECK-NEXT: .LBB40_114: // %cond.load189 |
| ; CHECK-NEXT: mov w9, #48 // =0x30 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #49, .LBB40_52 |
| ; CHECK-NEXT: .LBB40_115: // %cond.load193 |
| ; CHECK-NEXT: mov w9, #49 // =0x31 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #50, .LBB40_53 |
| ; CHECK-NEXT: .LBB40_116: // %cond.load197 |
| ; CHECK-NEXT: mov w9, #50 // =0x32 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #51, .LBB40_54 |
| ; CHECK-NEXT: .LBB40_117: // %cond.load201 |
| ; CHECK-NEXT: mov w9, #51 // =0x33 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #52, .LBB40_55 |
| ; CHECK-NEXT: .LBB40_118: // %cond.load205 |
| ; CHECK-NEXT: mov w9, #52 // =0x34 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #53, .LBB40_56 |
| ; CHECK-NEXT: .LBB40_119: // %cond.load209 |
| ; CHECK-NEXT: mov w9, #53 // =0x35 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #54, .LBB40_57 |
| ; CHECK-NEXT: .LBB40_120: // %cond.load213 |
| ; CHECK-NEXT: mov w9, #54 // =0x36 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #55, .LBB40_58 |
| ; CHECK-NEXT: .LBB40_121: // %cond.load217 |
| ; CHECK-NEXT: mov w9, #55 // =0x37 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #56, .LBB40_59 |
| ; CHECK-NEXT: .LBB40_122: // %cond.load221 |
| ; CHECK-NEXT: mov w9, #56 // =0x38 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #57, .LBB40_60 |
| ; CHECK-NEXT: .LBB40_123: // %cond.load225 |
| ; CHECK-NEXT: mov w9, #57 // =0x39 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #58, .LBB40_61 |
| ; CHECK-NEXT: .LBB40_124: // %cond.load229 |
| ; CHECK-NEXT: mov w9, #58 // =0x3a |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #59, .LBB40_62 |
| ; CHECK-NEXT: .LBB40_125: // %cond.load233 |
| ; CHECK-NEXT: mov w9, #59 // =0x3b |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #60, .LBB40_63 |
| ; CHECK-NEXT: .LBB40_126: // %cond.load237 |
| ; CHECK-NEXT: mov w9, #60 // =0x3c |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #61, .LBB40_64 |
| ; CHECK-NEXT: .LBB40_127: // %cond.load241 |
| ; CHECK-NEXT: mov w9, #61 // =0x3d |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #62, .LBB40_65 |
| ; CHECK-NEXT: .LBB40_128: // %cond.load245 |
| ; CHECK-NEXT: mov w9, #62 // =0x3e |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbnz x8, #63, .LBB40_66 |
| ; CHECK-NEXT: b .LBB40_67 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v64i16i32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl64 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s |
| ; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1sh { z0.s }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <64 x i16>, ptr %bp |
| %mask = icmp eq <64 x i16> %b, zeroinitializer |
| %load = call <64 x i16> @llvm.masked.expandload.v64i16(ptr %ap, <64 x i1> %mask, <64 x i16> poison) |
| %ext = sext <64 x i16> %load to <64 x i32> |
| store <64 x i32> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v32i16i64(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_sext_v32i16i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #112 |
| ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: ptrue p1.h, vl32 |
| ; CHECK-NEXT: str x2, [sp] // 8-byte Spill |
| ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x1] |
| ; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, #0 |
| ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: ptrue p0.h |
| ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-NEXT: umov w12, v0.b[1] |
| ; CHECK-NEXT: fmov w6, s0 |
| ; CHECK-NEXT: umov w3, v0.b[7] |
| ; CHECK-NEXT: umov w5, v0.b[8] |
| ; CHECK-NEXT: mov z5.b, z0.b[18] |
| ; CHECK-NEXT: mov z6.b, z0.b[19] |
| ; CHECK-NEXT: umov w13, v0.b[2] |
| ; CHECK-NEXT: umov w4, v0.b[9] |
| ; CHECK-NEXT: mov z7.b, z0.b[20] |
| ; CHECK-NEXT: umov w1, v0.b[10] |
| ; CHECK-NEXT: and w6, w6, #0x1 |
| ; CHECK-NEXT: mov z16.b, z0.b[21] |
| ; CHECK-NEXT: fmov w20, s5 |
| ; CHECK-NEXT: fmov w21, s6 |
| ; CHECK-NEXT: bfi w6, w12, #1, #1 |
| ; CHECK-NEXT: umov w11, v0.b[3] |
| ; CHECK-NEXT: umov w16, v0.b[11] |
| ; CHECK-NEXT: mov z17.b, z0.b[22] |
| ; CHECK-NEXT: fmov w22, s7 |
| ; CHECK-NEXT: ubfiz w12, w3, #7, #1 |
| ; CHECK-NEXT: ubfiz w3, w5, #8, #1 |
| ; CHECK-NEXT: umov w17, v0.b[12] |
| ; CHECK-NEXT: mov z18.b, z0.b[23] |
| ; CHECK-NEXT: bfi w6, w13, #2, #1 |
| ; CHECK-NEXT: ubfiz w13, w4, #9, #1 |
| ; CHECK-NEXT: umov w18, v0.b[13] |
| ; CHECK-NEXT: mov z19.b, z0.b[24] |
| ; CHECK-NEXT: fmov w23, s16 |
| ; CHECK-NEXT: ubfiz w5, w20, #18, #1 |
| ; CHECK-NEXT: ubfiz w20, w21, #19, #1 |
| ; CHECK-NEXT: orr w12, w12, w3 |
| ; CHECK-NEXT: ubfiz w1, w1, #10, #1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: mov z20.b, z0.b[25] |
| ; CHECK-NEXT: fmov w24, s17 |
| ; CHECK-NEXT: ubfiz w4, w22, #20, #1 |
| ; CHECK-NEXT: orr w12, w12, w13 |
| ; CHECK-NEXT: mov z21.b, z0.b[26] |
| ; CHECK-NEXT: fmov w25, s18 |
| ; CHECK-NEXT: orr w3, w5, w20 |
| ; CHECK-NEXT: bfi w6, w11, #3, #1 |
| ; CHECK-NEXT: orr w11, w12, w1 |
| ; CHECK-NEXT: ubfiz w12, w16, #11, #1 |
| ; CHECK-NEXT: umov w9, v0.b[5] |
| ; CHECK-NEXT: umov w14, v0.b[14] |
| ; CHECK-NEXT: mov z22.b, z0.b[27] |
| ; CHECK-NEXT: fmov w26, s19 |
| ; CHECK-NEXT: orr w13, w3, w4 |
| ; CHECK-NEXT: ubfiz w3, w23, #21, #1 |
| ; CHECK-NEXT: ubfiz w16, w17, #12, #1 |
| ; CHECK-NEXT: fmov w27, s20 |
| ; CHECK-NEXT: ubfiz w17, w24, #22, #1 |
| ; CHECK-NEXT: orr w11, w11, w12 |
| ; CHECK-NEXT: ubfiz w12, w18, #13, #1 |
| ; CHECK-NEXT: fmov w28, s21 |
| ; CHECK-NEXT: orr w13, w13, w3 |
| ; CHECK-NEXT: ubfiz w18, w25, #23, #1 |
| ; CHECK-NEXT: bfi w6, w10, #4, #1 |
| ; CHECK-NEXT: orr w10, w11, w16 |
| ; CHECK-NEXT: umov w15, v0.b[15] |
| ; CHECK-NEXT: mov z3.b, z0.b[16] |
| ; CHECK-NEXT: mov z23.b, z0.b[28] |
| ; CHECK-NEXT: fmov w29, s22 |
| ; CHECK-NEXT: orr w11, w13, w17 |
| ; CHECK-NEXT: orr w10, w10, w12 |
| ; CHECK-NEXT: ubfiz w12, w26, #24, #1 |
| ; CHECK-NEXT: mov z4.b, z0.b[17] |
| ; CHECK-NEXT: mov z24.b, z0.b[29] |
| ; CHECK-NEXT: orr w11, w11, w18 |
| ; CHECK-NEXT: bfi w6, w9, #5, #1 |
| ; CHECK-NEXT: ubfiz w9, w14, #14, #1 |
| ; CHECK-NEXT: ubfiz w13, w27, #25, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[30] |
| ; CHECK-NEXT: orr w11, w11, w12 |
| ; CHECK-NEXT: ubfiz w14, w28, #26, #1 |
| ; CHECK-NEXT: fmov w7, s3 |
| ; CHECK-NEXT: fmov w30, s23 |
| ; CHECK-NEXT: orr w9, w10, w9 |
| ; CHECK-NEXT: orr w10, w11, w13 |
| ; CHECK-NEXT: ubfiz w11, w29, #27, #1 |
| ; CHECK-NEXT: umov w2, v0.b[6] |
| ; CHECK-NEXT: fmov w19, s4 |
| ; CHECK-NEXT: fmov w8, s24 |
| ; CHECK-NEXT: ubfiz w12, w15, #15, #1 |
| ; CHECK-NEXT: orr w10, w10, w14 |
| ; CHECK-NEXT: ubfiz w14, w30, #28, #1 |
| ; CHECK-NEXT: mov z1.b, z0.b[31] |
| ; CHECK-NEXT: orr w10, w10, w11 |
| ; CHECK-NEXT: fmov w11, s2 |
| ; CHECK-NEXT: orr w9, w9, w12 |
| ; CHECK-NEXT: ubfiz w12, w7, #16, #1 |
| ; CHECK-NEXT: ubfiz w13, w19, #17, #1 |
| ; CHECK-NEXT: ubfiz w8, w8, #29, #1 |
| ; CHECK-NEXT: bfi w6, w2, #6, #1 |
| ; CHECK-NEXT: orr w10, w10, w14 |
| ; CHECK-NEXT: orr w9, w9, w12 |
| ; CHECK-NEXT: ubfiz w11, w11, #30, #1 |
| ; CHECK-NEXT: orr w8, w10, w8 |
| ; CHECK-NEXT: orr w9, w9, w13 |
| ; CHECK-NEXT: orr w9, w6, w9 |
| ; CHECK-NEXT: orr w8, w8, w11 |
| ; CHECK-NEXT: orr w8, w9, w8 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: orr w8, w8, w9, lsl #31 |
| ; CHECK-NEXT: tbz w8, #0, .LBB41_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0] |
| ; CHECK-NEXT: add x0, x0, #2 |
| ; CHECK-NEXT: tbnz w8, #1, .LBB41_3 |
| ; CHECK-NEXT: b .LBB41_4 |
| ; CHECK-NEXT: .LBB41_2: |
| ; CHECK-NEXT: adrp x9, .LCPI41_0 |
| ; CHECK-NEXT: add x9, x9, :lo12:.LCPI41_0 |
| ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x9] |
| ; CHECK-NEXT: tbz w8, #1, .LBB41_4 |
| ; CHECK-NEXT: .LBB41_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: .LBB41_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB41_36 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB41_37 |
| ; CHECK-NEXT: .LBB41_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB41_38 |
| ; CHECK-NEXT: .LBB41_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB41_39 |
| ; CHECK-NEXT: .LBB41_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB41_40 |
| ; CHECK-NEXT: .LBB41_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB41_41 |
| ; CHECK-NEXT: .LBB41_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB41_42 |
| ; CHECK-NEXT: .LBB41_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB41_43 |
| ; CHECK-NEXT: .LBB41_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB41_44 |
| ; CHECK-NEXT: .LBB41_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB41_45 |
| ; CHECK-NEXT: .LBB41_14: // %else42 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB41_46 |
| ; CHECK-NEXT: .LBB41_15: // %else46 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB41_47 |
| ; CHECK-NEXT: .LBB41_16: // %else50 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB41_48 |
| ; CHECK-NEXT: .LBB41_17: // %else54 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB41_49 |
| ; CHECK-NEXT: .LBB41_18: // %else58 |
| ; CHECK-NEXT: tbnz w8, #16, .LBB41_50 |
| ; CHECK-NEXT: .LBB41_19: // %else62 |
| ; CHECK-NEXT: tbnz w8, #17, .LBB41_51 |
| ; CHECK-NEXT: .LBB41_20: // %else66 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB41_52 |
| ; CHECK-NEXT: .LBB41_21: // %else70 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB41_53 |
| ; CHECK-NEXT: .LBB41_22: // %else74 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB41_54 |
| ; CHECK-NEXT: .LBB41_23: // %else78 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB41_55 |
| ; CHECK-NEXT: .LBB41_24: // %else82 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB41_56 |
| ; CHECK-NEXT: .LBB41_25: // %else86 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB41_57 |
| ; CHECK-NEXT: .LBB41_26: // %else90 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB41_58 |
| ; CHECK-NEXT: .LBB41_27: // %else94 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB41_59 |
| ; CHECK-NEXT: .LBB41_28: // %else98 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB41_60 |
| ; CHECK-NEXT: .LBB41_29: // %else102 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB41_61 |
| ; CHECK-NEXT: .LBB41_30: // %else106 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB41_62 |
| ; CHECK-NEXT: .LBB41_31: // %else110 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB41_63 |
| ; CHECK-NEXT: .LBB41_32: // %else114 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB41_64 |
| ; CHECK-NEXT: .LBB41_33: // %else118 |
| ; CHECK-NEXT: tbz w8, #31, .LBB41_35 |
| ; CHECK-NEXT: .LBB41_34: // %cond.load121 |
| ; CHECK-NEXT: mov w8, #31 // =0x1f |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w8 |
| ; CHECK-NEXT: ldrh w8, [x0] |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w8 |
| ; CHECK-NEXT: .LBB41_35: // %else122 |
| ; CHECK-NEXT: sunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.d, vl32 |
| ; CHECK-NEXT: ldr x8, [sp] // 8-byte Reload |
| ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: sunpklo z0.d, z0.s |
| ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: st1d { z0.d }, p0, [x8] |
| ; CHECK-NEXT: add sp, sp, #112 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB41_36: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #3, .LBB41_6 |
| ; CHECK-NEXT: .LBB41_37: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #4, .LBB41_7 |
| ; CHECK-NEXT: .LBB41_38: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #5, .LBB41_8 |
| ; CHECK-NEXT: .LBB41_39: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #6, .LBB41_9 |
| ; CHECK-NEXT: .LBB41_40: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #7, .LBB41_10 |
| ; CHECK-NEXT: .LBB41_41: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #8, .LBB41_11 |
| ; CHECK-NEXT: .LBB41_42: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #9, .LBB41_12 |
| ; CHECK-NEXT: .LBB41_43: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #10, .LBB41_13 |
| ; CHECK-NEXT: .LBB41_44: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #11, .LBB41_14 |
| ; CHECK-NEXT: .LBB41_45: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #12, .LBB41_15 |
| ; CHECK-NEXT: .LBB41_46: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #13, .LBB41_16 |
| ; CHECK-NEXT: .LBB41_47: // %cond.load49 |
| ; CHECK-NEXT: mov w9, #13 // =0xd |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #14, .LBB41_17 |
| ; CHECK-NEXT: .LBB41_48: // %cond.load53 |
| ; CHECK-NEXT: mov w9, #14 // =0xe |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #15, .LBB41_18 |
| ; CHECK-NEXT: .LBB41_49: // %cond.load57 |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #16, .LBB41_19 |
| ; CHECK-NEXT: .LBB41_50: // %cond.load61 |
| ; CHECK-NEXT: mov w9, #16 // =0x10 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #17, .LBB41_20 |
| ; CHECK-NEXT: .LBB41_51: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #18, .LBB41_21 |
| ; CHECK-NEXT: .LBB41_52: // %cond.load69 |
| ; CHECK-NEXT: mov w9, #18 // =0x12 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #19, .LBB41_22 |
| ; CHECK-NEXT: .LBB41_53: // %cond.load73 |
| ; CHECK-NEXT: mov w9, #19 // =0x13 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #20, .LBB41_23 |
| ; CHECK-NEXT: .LBB41_54: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #21, .LBB41_24 |
| ; CHECK-NEXT: .LBB41_55: // %cond.load81 |
| ; CHECK-NEXT: mov w9, #21 // =0x15 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #22, .LBB41_25 |
| ; CHECK-NEXT: .LBB41_56: // %cond.load85 |
| ; CHECK-NEXT: mov w9, #22 // =0x16 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #23, .LBB41_26 |
| ; CHECK-NEXT: .LBB41_57: // %cond.load89 |
| ; CHECK-NEXT: mov w9, #23 // =0x17 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #24, .LBB41_27 |
| ; CHECK-NEXT: .LBB41_58: // %cond.load93 |
| ; CHECK-NEXT: mov w9, #24 // =0x18 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #25, .LBB41_28 |
| ; CHECK-NEXT: .LBB41_59: // %cond.load97 |
| ; CHECK-NEXT: mov w9, #25 // =0x19 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #26, .LBB41_29 |
| ; CHECK-NEXT: .LBB41_60: // %cond.load101 |
| ; CHECK-NEXT: mov w9, #26 // =0x1a |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #27, .LBB41_30 |
| ; CHECK-NEXT: .LBB41_61: // %cond.load105 |
| ; CHECK-NEXT: mov w9, #27 // =0x1b |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #28, .LBB41_31 |
| ; CHECK-NEXT: .LBB41_62: // %cond.load109 |
| ; CHECK-NEXT: mov w9, #28 // =0x1c |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #29, .LBB41_32 |
| ; CHECK-NEXT: .LBB41_63: // %cond.load113 |
| ; CHECK-NEXT: mov w9, #29 // =0x1d |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #30, .LBB41_33 |
| ; CHECK-NEXT: .LBB41_64: // %cond.load117 |
| ; CHECK-NEXT: mov w9, #30 // =0x1e |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbnz w8, #31, .LBB41_34 |
| ; CHECK-NEXT: b .LBB41_35 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v32i16i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl32 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.d |
| ; CHECK-EXPAND-NEXT: whilelo p2.d, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1sh { z0.d }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.d, p1, z0.d |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <32 x i16>, ptr %bp |
| %mask = icmp eq <32 x i16> %b, zeroinitializer |
| %load = call <32 x i16> @llvm.masked.expandload.v32i16(ptr %ap, <32 x i1> %mask, <32 x i16> poison) |
| %ext = sext <32 x i16> %load to <32 x i64> |
| store <32 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_v32i32i64(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_sext_v32i32i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #112 |
| ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: ptrue p1.s, vl32 |
| ; CHECK-NEXT: ld1w { z0.s }, p1/z, [x1] |
| ; CHECK-NEXT: cmpeq p0.s, p1/z, z0.s, #0 |
| ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-NEXT: mov z5.b, z0.b[18] |
| ; CHECK-NEXT: mov z6.b, z0.b[19] |
| ; CHECK-NEXT: umov w13, v0.b[1] |
| ; CHECK-NEXT: umov w3, v0.b[7] |
| ; CHECK-NEXT: umov w4, v0.b[8] |
| ; CHECK-NEXT: mov z7.b, z0.b[20] |
| ; CHECK-NEXT: fmov w6, s0 |
| ; CHECK-NEXT: umov w5, v0.b[9] |
| ; CHECK-NEXT: umov w12, v0.b[2] |
| ; CHECK-NEXT: mov z16.b, z0.b[21] |
| ; CHECK-NEXT: fmov w20, s5 |
| ; CHECK-NEXT: fmov w21, s6 |
| ; CHECK-NEXT: umov w18, v0.b[10] |
| ; CHECK-NEXT: mov z17.b, z0.b[22] |
| ; CHECK-NEXT: fmov w22, s7 |
| ; CHECK-NEXT: and w6, w6, #0x1 |
| ; CHECK-NEXT: umov w11, v0.b[3] |
| ; CHECK-NEXT: umov w1, v0.b[11] |
| ; CHECK-NEXT: bfi w6, w13, #1, #1 |
| ; CHECK-NEXT: ubfiz w13, w3, #7, #1 |
| ; CHECK-NEXT: ubfiz w3, w4, #8, #1 |
| ; CHECK-NEXT: fmov w23, s16 |
| ; CHECK-NEXT: ubfiz w4, w5, #9, #1 |
| ; CHECK-NEXT: ubfiz w5, w20, #18, #1 |
| ; CHECK-NEXT: ubfiz w20, w21, #19, #1 |
| ; CHECK-NEXT: umov w16, v0.b[12] |
| ; CHECK-NEXT: mov z18.b, z0.b[23] |
| ; CHECK-NEXT: fmov w24, s17 |
| ; CHECK-NEXT: bfi w6, w12, #2, #1 |
| ; CHECK-NEXT: orr w12, w13, w3 |
| ; CHECK-NEXT: ubfiz w13, w22, #20, #1 |
| ; CHECK-NEXT: umov w17, v0.b[13] |
| ; CHECK-NEXT: mov z19.b, z0.b[24] |
| ; CHECK-NEXT: orr w3, w5, w20 |
| ; CHECK-NEXT: ubfiz w18, w18, #10, #1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: mov z20.b, z0.b[25] |
| ; CHECK-NEXT: orr w12, w12, w4 |
| ; CHECK-NEXT: orr w13, w3, w13 |
| ; CHECK-NEXT: ubfiz w3, w23, #21, #1 |
| ; CHECK-NEXT: umov w14, v0.b[14] |
| ; CHECK-NEXT: mov z21.b, z0.b[26] |
| ; CHECK-NEXT: fmov w25, s18 |
| ; CHECK-NEXT: ubfiz w1, w1, #11, #1 |
| ; CHECK-NEXT: bfi w6, w11, #3, #1 |
| ; CHECK-NEXT: orr w11, w12, w18 |
| ; CHECK-NEXT: ubfiz w12, w24, #22, #1 |
| ; CHECK-NEXT: umov w9, v0.b[5] |
| ; CHECK-NEXT: mov z22.b, z0.b[27] |
| ; CHECK-NEXT: fmov w26, s19 |
| ; CHECK-NEXT: orr w13, w13, w3 |
| ; CHECK-NEXT: ubfiz w16, w16, #12, #1 |
| ; CHECK-NEXT: fmov w27, s20 |
| ; CHECK-NEXT: orr w11, w11, w1 |
| ; CHECK-NEXT: orr w12, w13, w12 |
| ; CHECK-NEXT: ubfiz w13, w17, #13, #1 |
| ; CHECK-NEXT: umov w8, v0.b[6] |
| ; CHECK-NEXT: mov z24.b, z0.b[29] |
| ; CHECK-NEXT: fmov w28, s21 |
| ; CHECK-NEXT: ubfiz w17, w25, #23, #1 |
| ; CHECK-NEXT: bfi w6, w10, #4, #1 |
| ; CHECK-NEXT: orr w10, w11, w16 |
| ; CHECK-NEXT: mov z3.b, z0.b[16] |
| ; CHECK-NEXT: mov z23.b, z0.b[28] |
| ; CHECK-NEXT: fmov w29, s22 |
| ; CHECK-NEXT: ubfiz w11, w26, #24, #1 |
| ; CHECK-NEXT: orr w10, w10, w13 |
| ; CHECK-NEXT: ubfiz w13, w14, #14, #1 |
| ; CHECK-NEXT: umov w15, v0.b[15] |
| ; CHECK-NEXT: mov z4.b, z0.b[17] |
| ; CHECK-NEXT: orr w12, w12, w17 |
| ; CHECK-NEXT: ubfiz w14, w27, #25, #1 |
| ; CHECK-NEXT: bfi w6, w9, #5, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[30] |
| ; CHECK-NEXT: orr w11, w12, w11 |
| ; CHECK-NEXT: ubfiz w9, w28, #26, #1 |
| ; CHECK-NEXT: orr w10, w10, w13 |
| ; CHECK-NEXT: fmov w13, s24 |
| ; CHECK-NEXT: fmov w7, s3 |
| ; CHECK-NEXT: fmov w30, s23 |
| ; CHECK-NEXT: orr w11, w11, w14 |
| ; CHECK-NEXT: bfi w6, w8, #6, #1 |
| ; CHECK-NEXT: ubfiz w8, w29, #27, #1 |
| ; CHECK-NEXT: fmov w19, s4 |
| ; CHECK-NEXT: orr w9, w11, w9 |
| ; CHECK-NEXT: ubfiz w12, w15, #15, #1 |
| ; CHECK-NEXT: mov z1.b, z0.b[31] |
| ; CHECK-NEXT: orr w8, w9, w8 |
| ; CHECK-NEXT: ubfiz w9, w13, #29, #1 |
| ; CHECK-NEXT: fmov w13, s2 |
| ; CHECK-NEXT: ubfiz w11, w7, #16, #1 |
| ; CHECK-NEXT: ubfiz w14, w30, #28, #1 |
| ; CHECK-NEXT: orr w10, w10, w12 |
| ; CHECK-NEXT: ubfiz w12, w19, #17, #1 |
| ; CHECK-NEXT: orr w10, w10, w11 |
| ; CHECK-NEXT: orr w8, w8, w14 |
| ; CHECK-NEXT: ubfiz w11, w13, #30, #1 |
| ; CHECK-NEXT: orr w10, w10, w12 |
| ; CHECK-NEXT: orr w8, w8, w9 |
| ; CHECK-NEXT: orr w9, w6, w10 |
| ; CHECK-NEXT: orr w8, w8, w11 |
| ; CHECK-NEXT: orr w8, w9, w8 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: orr w8, w8, w9, lsl #31 |
| ; CHECK-NEXT: tbz w8, #0, .LBB42_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0] |
| ; CHECK-NEXT: add x0, x0, #4 |
| ; CHECK-NEXT: tbnz w8, #1, .LBB42_3 |
| ; CHECK-NEXT: b .LBB42_4 |
| ; CHECK-NEXT: .LBB42_2: |
| ; CHECK-NEXT: adrp x9, .LCPI42_0 |
| ; CHECK-NEXT: add x9, x9, :lo12:.LCPI42_0 |
| ; CHECK-NEXT: ld1w { z0.s }, p1/z, [x9] |
| ; CHECK-NEXT: tbz w8, #1, .LBB42_4 |
| ; CHECK-NEXT: .LBB42_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: .LBB42_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB42_36 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB42_37 |
| ; CHECK-NEXT: .LBB42_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB42_38 |
| ; CHECK-NEXT: .LBB42_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB42_39 |
| ; CHECK-NEXT: .LBB42_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB42_40 |
| ; CHECK-NEXT: .LBB42_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB42_41 |
| ; CHECK-NEXT: .LBB42_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB42_42 |
| ; CHECK-NEXT: .LBB42_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB42_43 |
| ; CHECK-NEXT: .LBB42_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB42_44 |
| ; CHECK-NEXT: .LBB42_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB42_45 |
| ; CHECK-NEXT: .LBB42_14: // %else42 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB42_46 |
| ; CHECK-NEXT: .LBB42_15: // %else46 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB42_47 |
| ; CHECK-NEXT: .LBB42_16: // %else50 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB42_48 |
| ; CHECK-NEXT: .LBB42_17: // %else54 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB42_49 |
| ; CHECK-NEXT: .LBB42_18: // %else58 |
| ; CHECK-NEXT: tbnz w8, #16, .LBB42_50 |
| ; CHECK-NEXT: .LBB42_19: // %else62 |
| ; CHECK-NEXT: tbnz w8, #17, .LBB42_51 |
| ; CHECK-NEXT: .LBB42_20: // %else66 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB42_52 |
| ; CHECK-NEXT: .LBB42_21: // %else70 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB42_53 |
| ; CHECK-NEXT: .LBB42_22: // %else74 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB42_54 |
| ; CHECK-NEXT: .LBB42_23: // %else78 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB42_55 |
| ; CHECK-NEXT: .LBB42_24: // %else82 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB42_56 |
| ; CHECK-NEXT: .LBB42_25: // %else86 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB42_57 |
| ; CHECK-NEXT: .LBB42_26: // %else90 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB42_58 |
| ; CHECK-NEXT: .LBB42_27: // %else94 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB42_59 |
| ; CHECK-NEXT: .LBB42_28: // %else98 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB42_60 |
| ; CHECK-NEXT: .LBB42_29: // %else102 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB42_61 |
| ; CHECK-NEXT: .LBB42_30: // %else106 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB42_62 |
| ; CHECK-NEXT: .LBB42_31: // %else110 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB42_63 |
| ; CHECK-NEXT: .LBB42_32: // %else114 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB42_64 |
| ; CHECK-NEXT: .LBB42_33: // %else118 |
| ; CHECK-NEXT: tbz w8, #31, .LBB42_35 |
| ; CHECK-NEXT: .LBB42_34: // %cond.load121 |
| ; CHECK-NEXT: mov w8, #31 // =0x1f |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w8 |
| ; CHECK-NEXT: ldr w8, [x0] |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w8 |
| ; CHECK-NEXT: .LBB42_35: // %else122 |
| ; CHECK-NEXT: sunpklo z0.d, z0.s |
| ; CHECK-NEXT: ptrue p0.d, vl32 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: add sp, sp, #112 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB42_36: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #3, .LBB42_6 |
| ; CHECK-NEXT: .LBB42_37: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #4, .LBB42_7 |
| ; CHECK-NEXT: .LBB42_38: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #5, .LBB42_8 |
| ; CHECK-NEXT: .LBB42_39: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #6, .LBB42_9 |
| ; CHECK-NEXT: .LBB42_40: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #7, .LBB42_10 |
| ; CHECK-NEXT: .LBB42_41: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #8, .LBB42_11 |
| ; CHECK-NEXT: .LBB42_42: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #9, .LBB42_12 |
| ; CHECK-NEXT: .LBB42_43: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #10, .LBB42_13 |
| ; CHECK-NEXT: .LBB42_44: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #11, .LBB42_14 |
| ; CHECK-NEXT: .LBB42_45: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #12, .LBB42_15 |
| ; CHECK-NEXT: .LBB42_46: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #13, .LBB42_16 |
| ; CHECK-NEXT: .LBB42_47: // %cond.load49 |
| ; CHECK-NEXT: mov w9, #13 // =0xd |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #14, .LBB42_17 |
| ; CHECK-NEXT: .LBB42_48: // %cond.load53 |
| ; CHECK-NEXT: mov w9, #14 // =0xe |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #15, .LBB42_18 |
| ; CHECK-NEXT: .LBB42_49: // %cond.load57 |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #16, .LBB42_19 |
| ; CHECK-NEXT: .LBB42_50: // %cond.load61 |
| ; CHECK-NEXT: mov w9, #16 // =0x10 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #17, .LBB42_20 |
| ; CHECK-NEXT: .LBB42_51: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #18, .LBB42_21 |
| ; CHECK-NEXT: .LBB42_52: // %cond.load69 |
| ; CHECK-NEXT: mov w9, #18 // =0x12 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #19, .LBB42_22 |
| ; CHECK-NEXT: .LBB42_53: // %cond.load73 |
| ; CHECK-NEXT: mov w9, #19 // =0x13 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #20, .LBB42_23 |
| ; CHECK-NEXT: .LBB42_54: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #21, .LBB42_24 |
| ; CHECK-NEXT: .LBB42_55: // %cond.load81 |
| ; CHECK-NEXT: mov w9, #21 // =0x15 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #22, .LBB42_25 |
| ; CHECK-NEXT: .LBB42_56: // %cond.load85 |
| ; CHECK-NEXT: mov w9, #22 // =0x16 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #23, .LBB42_26 |
| ; CHECK-NEXT: .LBB42_57: // %cond.load89 |
| ; CHECK-NEXT: mov w9, #23 // =0x17 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #24, .LBB42_27 |
| ; CHECK-NEXT: .LBB42_58: // %cond.load93 |
| ; CHECK-NEXT: mov w9, #24 // =0x18 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #25, .LBB42_28 |
| ; CHECK-NEXT: .LBB42_59: // %cond.load97 |
| ; CHECK-NEXT: mov w9, #25 // =0x19 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #26, .LBB42_29 |
| ; CHECK-NEXT: .LBB42_60: // %cond.load101 |
| ; CHECK-NEXT: mov w9, #26 // =0x1a |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #27, .LBB42_30 |
| ; CHECK-NEXT: .LBB42_61: // %cond.load105 |
| ; CHECK-NEXT: mov w9, #27 // =0x1b |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #28, .LBB42_31 |
| ; CHECK-NEXT: .LBB42_62: // %cond.load109 |
| ; CHECK-NEXT: mov w9, #28 // =0x1c |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #29, .LBB42_32 |
| ; CHECK-NEXT: .LBB42_63: // %cond.load113 |
| ; CHECK-NEXT: mov w9, #29 // =0x1d |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #30, .LBB42_33 |
| ; CHECK-NEXT: .LBB42_64: // %cond.load117 |
| ; CHECK-NEXT: mov w9, #30 // =0x1e |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbnz w8, #31, .LBB42_34 |
| ; CHECK-NEXT: b .LBB42_35 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_v32i32i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl32 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.d |
| ; CHECK-EXPAND-NEXT: whilelo p2.d, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1sw { z0.d }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.d, p1, z0.d |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <32 x i32>, ptr %bp |
| %mask = icmp eq <32 x i32> %b, zeroinitializer |
| %load = call <32 x i32> @llvm.masked.expandload.v32i32(ptr %ap, <32 x i1> %mask, <32 x i32> poison) |
| %ext = sext <32 x i32> %load to <32 x i64> |
| store <32 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v128i8i16(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_zext_v128i8i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: ptrue p1.b, vl128 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x1] |
| ; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, #0 |
| ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: umov w9, v1.b[1] |
| ; CHECK-NEXT: fmov w8, s1 |
| ; CHECK-NEXT: mov z0.b, z1.b[18] |
| ; CHECK-NEXT: umov w10, v1.b[7] |
| ; CHECK-NEXT: umov w11, v1.b[8] |
| ; CHECK-NEXT: mov z2.b, z1.b[19] |
| ; CHECK-NEXT: umov w12, v1.b[2] |
| ; CHECK-NEXT: umov w14, v1.b[9] |
| ; CHECK-NEXT: umov w13, v1.b[3] |
| ; CHECK-NEXT: and x8, x8, #0x1 |
| ; CHECK-NEXT: fmov w16, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[20] |
| ; CHECK-NEXT: bfi x8, x9, #1, #1 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: umov w15, v1.b[10] |
| ; CHECK-NEXT: ubfiz x10, x10, #7, #1 |
| ; CHECK-NEXT: ubfiz x11, x11, #8, #1 |
| ; CHECK-NEXT: mov z2.b, z1.b[21] |
| ; CHECK-NEXT: bfi x8, x12, #2, #1 |
| ; CHECK-NEXT: fmov w12, s0 |
| ; CHECK-NEXT: ubfiz x16, x16, #18, #1 |
| ; CHECK-NEXT: ubfiz x9, x9, #19, #1 |
| ; CHECK-NEXT: ubfiz x14, x14, #9, #1 |
| ; CHECK-NEXT: orr x10, x10, x11 |
| ; CHECK-NEXT: umov w11, v1.b[11] |
| ; CHECK-NEXT: mov z0.b, z1.b[22] |
| ; CHECK-NEXT: ubfiz x15, x15, #10, #1 |
| ; CHECK-NEXT: ubfiz x12, x12, #20, #1 |
| ; CHECK-NEXT: orr x9, x16, x9 |
| ; CHECK-NEXT: orr x10, x10, x14 |
| ; CHECK-NEXT: fmov w14, s2 |
| ; CHECK-NEXT: bfi x8, x13, #3, #1 |
| ; CHECK-NEXT: orr x10, x10, x15 |
| ; CHECK-NEXT: orr x9, x9, x12 |
| ; CHECK-NEXT: umov w12, v1.b[12] |
| ; CHECK-NEXT: fmov w13, s0 |
| ; CHECK-NEXT: ubfiz x11, x11, #11, #1 |
| ; CHECK-NEXT: umov w15, v1.b[13] |
| ; CHECK-NEXT: mov z0.b, z1.b[16] |
| ; CHECK-NEXT: ubfiz x14, x14, #21, #1 |
| ; CHECK-NEXT: mov z2.b, z1.b[17] |
| ; CHECK-NEXT: umov w16, v1.b[4] |
| ; CHECK-NEXT: ubfiz x13, x13, #22, #1 |
| ; CHECK-NEXT: orr x10, x10, x11 |
| ; CHECK-NEXT: umov w11, v1.b[14] |
| ; CHECK-NEXT: orr x9, x9, x14 |
| ; CHECK-NEXT: ubfiz x12, x12, #12, #1 |
| ; CHECK-NEXT: umov w14, v1.b[5] |
| ; CHECK-NEXT: orr x9, x9, x13 |
| ; CHECK-NEXT: umov w13, v1.b[15] |
| ; CHECK-NEXT: ubfiz x15, x15, #13, #1 |
| ; CHECK-NEXT: orr x10, x10, x12 |
| ; CHECK-NEXT: fmov w12, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[23] |
| ; CHECK-NEXT: ubfiz x11, x11, #14, #1 |
| ; CHECK-NEXT: orr x10, x10, x15 |
| ; CHECK-NEXT: fmov w15, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[24] |
| ; CHECK-NEXT: bfi x8, x16, #4, #1 |
| ; CHECK-NEXT: umov w16, v1.b[6] |
| ; CHECK-NEXT: ubfiz x13, x13, #15, #1 |
| ; CHECK-NEXT: orr x10, x10, x11 |
| ; CHECK-NEXT: fmov w11, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[25] |
| ; CHECK-NEXT: ubfiz x12, x12, #16, #1 |
| ; CHECK-NEXT: bfi x8, x14, #5, #1 |
| ; CHECK-NEXT: orr x10, x10, x13 |
| ; CHECK-NEXT: fmov w13, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[26] |
| ; CHECK-NEXT: ubfiz x11, x11, #23, #1 |
| ; CHECK-NEXT: orr x10, x10, x12 |
| ; CHECK-NEXT: ubfiz x14, x15, #17, #1 |
| ; CHECK-NEXT: fmov w12, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[27] |
| ; CHECK-NEXT: bfi x8, x16, #6, #1 |
| ; CHECK-NEXT: ubfiz x13, x13, #24, #1 |
| ; CHECK-NEXT: orr x9, x9, x11 |
| ; CHECK-NEXT: fmov w11, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[28] |
| ; CHECK-NEXT: orr x10, x10, x14 |
| ; CHECK-NEXT: orr x9, x9, x13 |
| ; CHECK-NEXT: ubfiz x12, x12, #25, #1 |
| ; CHECK-NEXT: fmov w13, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[29] |
| ; CHECK-NEXT: ubfiz x11, x11, #26, #1 |
| ; CHECK-NEXT: orr x8, x8, x10 |
| ; CHECK-NEXT: orr x9, x9, x12 |
| ; CHECK-NEXT: fmov w12, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[30] |
| ; CHECK-NEXT: orr x9, x9, x11 |
| ; CHECK-NEXT: ubfiz x11, x13, #27, #1 |
| ; CHECK-NEXT: fmov w13, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[31] |
| ; CHECK-NEXT: orr x9, x9, x11 |
| ; CHECK-NEXT: ubfiz x12, x12, #28, #1 |
| ; CHECK-NEXT: ubfiz x11, x13, #29, #1 |
| ; CHECK-NEXT: fmov w13, s2 |
| ; CHECK-NEXT: orr x9, x9, x12 |
| ; CHECK-NEXT: mov z2.b, z1.b[32] |
| ; CHECK-NEXT: fmov w10, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[33] |
| ; CHECK-NEXT: orr x9, x9, x11 |
| ; CHECK-NEXT: ubfiz x12, x13, #30, #1 |
| ; CHECK-NEXT: lsl w10, w10, #31 |
| ; CHECK-NEXT: orr x9, x9, x12 |
| ; CHECK-NEXT: orr x8, x8, x9 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[34] |
| ; CHECK-NEXT: orr x8, x8, x10 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #32 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[35] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #33 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[36] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #34 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[37] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #35 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[38] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #36 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[39] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #37 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[40] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #38 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[41] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #39 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[42] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #40 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[43] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #41 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[44] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #42 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[45] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #43 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[46] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #44 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[47] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #45 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[48] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #46 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[49] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #47 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[50] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #48 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[51] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #49 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[52] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #50 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[53] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #51 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[54] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #52 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[55] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #53 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[56] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #54 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[57] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #55 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[58] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #56 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[59] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #57 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[60] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #58 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[61] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: fmov w10, s0 |
| ; CHECK-NEXT: mov z0.b, z1.b[63] |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #59 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z1.b[62] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #60 |
| ; CHECK-NEXT: and w9, w10, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #61 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #62 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #63 |
| ; CHECK-NEXT: tbz w8, #0, .LBB43_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] |
| ; CHECK-NEXT: add x0, x0, #1 |
| ; CHECK-NEXT: tbnz w8, #1, .LBB43_3 |
| ; CHECK-NEXT: b .LBB43_4 |
| ; CHECK-NEXT: .LBB43_2: |
| ; CHECK-NEXT: adrp x9, .LCPI43_0 |
| ; CHECK-NEXT: add x9, x9, :lo12:.LCPI43_0 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9] |
| ; CHECK-NEXT: tbz w8, #1, .LBB43_4 |
| ; CHECK-NEXT: .LBB43_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: .LBB43_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB43_181 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB43_182 |
| ; CHECK-NEXT: .LBB43_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB43_183 |
| ; CHECK-NEXT: .LBB43_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB43_184 |
| ; CHECK-NEXT: .LBB43_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB43_185 |
| ; CHECK-NEXT: .LBB43_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB43_186 |
| ; CHECK-NEXT: .LBB43_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB43_187 |
| ; CHECK-NEXT: .LBB43_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB43_188 |
| ; CHECK-NEXT: .LBB43_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB43_189 |
| ; CHECK-NEXT: .LBB43_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB43_190 |
| ; CHECK-NEXT: .LBB43_14: // %else42 |
| ; CHECK-NEXT: tbz w8, #12, .LBB43_16 |
| ; CHECK-NEXT: .LBB43_15: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: .LBB43_16: // %else46 |
| ; CHECK-NEXT: mov w12, #71 // =0x47 |
| ; CHECK-NEXT: mov w14, #72 // =0x48 |
| ; CHECK-NEXT: mov w9, #83 // =0x53 |
| ; CHECK-NEXT: mov w10, #84 // =0x54 |
| ; CHECK-NEXT: tbz w8, #13, .LBB43_18 |
| ; CHECK-NEXT: // %bb.17: // %cond.load49 |
| ; CHECK-NEXT: mov w11, #13 // =0xd |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w11 |
| ; CHECK-NEXT: ldrb w11, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w11 |
| ; CHECK-NEXT: .LBB43_18: // %else50 |
| ; CHECK-NEXT: mov w11, #73 // =0x49 |
| ; CHECK-NEXT: mov w13, #85 // =0x55 |
| ; CHECK-NEXT: tbz w8, #14, .LBB43_20 |
| ; CHECK-NEXT: // %bb.19: // %cond.load53 |
| ; CHECK-NEXT: mov w15, #14 // =0xe |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w15 |
| ; CHECK-NEXT: ldrb w15, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w15 |
| ; CHECK-NEXT: .LBB43_20: // %else54 |
| ; CHECK-NEXT: whilels p3.b, xzr, x12 |
| ; CHECK-NEXT: whilels p4.b, xzr, x14 |
| ; CHECK-NEXT: mov w14, #86 // =0x56 |
| ; CHECK-NEXT: whilels p1.b, xzr, x9 |
| ; CHECK-NEXT: mov w9, #74 // =0x4a |
| ; CHECK-NEXT: whilels p2.b, xzr, x10 |
| ; CHECK-NEXT: tbz w8, #15, .LBB43_22 |
| ; CHECK-NEXT: // %bb.21: // %cond.load57 |
| ; CHECK-NEXT: mov w10, #15 // =0xf |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w10 |
| ; CHECK-NEXT: ldrb w10, [x0], #1 |
| ; CHECK-NEXT: cmpeq p5.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p5/m, w10 |
| ; CHECK-NEXT: .LBB43_22: // %else58 |
| ; CHECK-NEXT: lastb w10, p3, z1.b |
| ; CHECK-NEXT: mov w1, #75 // =0x4b |
| ; CHECK-NEXT: mov w17, #87 // =0x57 |
| ; CHECK-NEXT: lastb w12, p4, z1.b |
| ; CHECK-NEXT: lastb w15, p1, z1.b |
| ; CHECK-NEXT: lastb w16, p2, z1.b |
| ; CHECK-NEXT: whilels p2.b, xzr, x11 |
| ; CHECK-NEXT: whilels p1.b, xzr, x13 |
| ; CHECK-NEXT: tbz w8, #16, .LBB43_24 |
| ; CHECK-NEXT: // %bb.23: // %cond.load61 |
| ; CHECK-NEXT: mov w11, #16 // =0x10 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w11 |
| ; CHECK-NEXT: ldrb w11, [x0], #1 |
| ; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p3/m, w11 |
| ; CHECK-NEXT: .LBB43_24: // %else62 |
| ; CHECK-NEXT: lastb w11, p2, z1.b |
| ; CHECK-NEXT: mov w3, #76 // =0x4c |
| ; CHECK-NEXT: mov w18, #88 // =0x58 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: whilels p2.b, xzr, x9 |
| ; CHECK-NEXT: whilels p1.b, xzr, x14 |
| ; CHECK-NEXT: tbz w8, #17, .LBB43_26 |
| ; CHECK-NEXT: // %bb.25: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p3/m, w9 |
| ; CHECK-NEXT: .LBB43_26: // %else66 |
| ; CHECK-NEXT: lastb w9, p2, z1.b |
| ; CHECK-NEXT: ubfiz x5, x10, #7, #1 |
| ; CHECK-NEXT: ubfiz x7, x12, #8, #1 |
| ; CHECK-NEXT: ubfiz x4, x15, #19, #1 |
| ; CHECK-NEXT: ubfiz x6, x16, #20, #1 |
| ; CHECK-NEXT: mov w15, #89 // =0x59 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: whilels p2.b, xzr, x1 |
| ; CHECK-NEXT: mov w1, #77 // =0x4d |
| ; CHECK-NEXT: whilels p1.b, xzr, x17 |
| ; CHECK-NEXT: mov w17, #64 // =0x40 |
| ; CHECK-NEXT: tbz w8, #18, .LBB43_28 |
| ; CHECK-NEXT: // %bb.27: // %cond.load69 |
| ; CHECK-NEXT: mov w10, #18 // =0x12 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w10 |
| ; CHECK-NEXT: ldrb w10, [x0], #1 |
| ; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p3/m, w10 |
| ; CHECK-NEXT: .LBB43_28: // %else70 |
| ; CHECK-NEXT: sub sp, sp, #64 |
| ; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 64 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: lastb w10, p2, z1.b |
| ; CHECK-NEXT: orr x7, x5, x7 |
| ; CHECK-NEXT: ubfiz x5, x13, #21, #1 |
| ; CHECK-NEXT: mov w16, #65 // =0x41 |
| ; CHECK-NEXT: orr x19, x4, x6 |
| ; CHECK-NEXT: mov w4, #90 // =0x5a |
| ; CHECK-NEXT: lastb w12, p1, z1.b |
| ; CHECK-NEXT: whilels p2.b, xzr, x3 |
| ; CHECK-NEXT: ubfiz x3, x11, #9, #1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x18 |
| ; CHECK-NEXT: mov w18, #78 // =0x4e |
| ; CHECK-NEXT: tbz w8, #19, .LBB43_30 |
| ; CHECK-NEXT: // %bb.29: // %cond.load73 |
| ; CHECK-NEXT: mov w11, #19 // =0x13 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w11 |
| ; CHECK-NEXT: ldrb w11, [x0], #1 |
| ; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p3/m, w11 |
| ; CHECK-NEXT: .LBB43_30: // %else74 |
| ; CHECK-NEXT: lastb w11, p2, z1.b |
| ; CHECK-NEXT: ubfiz x21, x9, #10, #1 |
| ; CHECK-NEXT: ubfiz x6, x14, #22, #1 |
| ; CHECK-NEXT: orr x7, x7, x3 |
| ; CHECK-NEXT: mov w3, #79 // =0x4f |
| ; CHECK-NEXT: orr x20, x19, x5 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: mov w5, #91 // =0x5b |
| ; CHECK-NEXT: whilels p3.b, xzr, x17 |
| ; CHECK-NEXT: mov w17, #66 // =0x42 |
| ; CHECK-NEXT: whilels p2.b, xzr, x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x15 |
| ; CHECK-NEXT: tbz w8, #20, .LBB43_32 |
| ; CHECK-NEXT: // %bb.31: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p4/m, w9 |
| ; CHECK-NEXT: .LBB43_32: // %else78 |
| ; CHECK-NEXT: lastb w9, p3, z1.b |
| ; CHECK-NEXT: orr x19, x7, x21 |
| ; CHECK-NEXT: ubfiz x21, x10, #11, #1 |
| ; CHECK-NEXT: ubfiz x7, x12, #23, #1 |
| ; CHECK-NEXT: mov w1, #67 // =0x43 |
| ; CHECK-NEXT: orr x22, x20, x6 |
| ; CHECK-NEXT: lastb w14, p2, z1.b |
| ; CHECK-NEXT: mov w6, #92 // =0x5c |
| ; CHECK-NEXT: lastb w15, p1, z1.b |
| ; CHECK-NEXT: whilels p3.b, xzr, x16 |
| ; CHECK-NEXT: whilels p2.b, xzr, x18 |
| ; CHECK-NEXT: whilels p1.b, xzr, x4 |
| ; CHECK-NEXT: mov w4, #80 // =0x50 |
| ; CHECK-NEXT: tbz w8, #21, .LBB43_34 |
| ; CHECK-NEXT: // %bb.33: // %cond.load81 |
| ; CHECK-NEXT: mov w10, #21 // =0x15 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w10 |
| ; CHECK-NEXT: ldrb w10, [x0], #1 |
| ; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p4/m, w10 |
| ; CHECK-NEXT: .LBB43_34: // %else82 |
| ; CHECK-NEXT: lastb w10, p3, z1.b |
| ; CHECK-NEXT: orr x20, x19, x21 |
| ; CHECK-NEXT: ubfiz x21, x11, #12, #1 |
| ; CHECK-NEXT: ubfiz x19, x13, #24, #1 |
| ; CHECK-NEXT: mov w18, #68 // =0x44 |
| ; CHECK-NEXT: orr x23, x22, x7 |
| ; CHECK-NEXT: lastb w12, p2, z1.b |
| ; CHECK-NEXT: mov w7, #93 // =0x5d |
| ; CHECK-NEXT: lastb w16, p1, z1.b |
| ; CHECK-NEXT: whilels p3.b, xzr, x17 |
| ; CHECK-NEXT: whilels p2.b, xzr, x3 |
| ; CHECK-NEXT: whilels p1.b, xzr, x5 |
| ; CHECK-NEXT: mov w5, #81 // =0x51 |
| ; CHECK-NEXT: tbz w8, #22, .LBB43_36 |
| ; CHECK-NEXT: // %bb.35: // %cond.load85 |
| ; CHECK-NEXT: mov w11, #22 // =0x16 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w11 |
| ; CHECK-NEXT: ldrb w11, [x0], #1 |
| ; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p4/m, w11 |
| ; CHECK-NEXT: .LBB43_36: // %else86 |
| ; CHECK-NEXT: lastb w11, p3, z1.b |
| ; CHECK-NEXT: orr x20, x20, x21 |
| ; CHECK-NEXT: ubfiz x21, x14, #13, #1 |
| ; CHECK-NEXT: ubfiz x22, x15, #25, #1 |
| ; CHECK-NEXT: and x9, x9, #0x1 |
| ; CHECK-NEXT: mov w3, #69 // =0x45 |
| ; CHECK-NEXT: lastb w13, p2, z1.b |
| ; CHECK-NEXT: orr x24, x23, x19 |
| ; CHECK-NEXT: mov w19, #94 // =0x5e |
| ; CHECK-NEXT: lastb w17, p1, z1.b |
| ; CHECK-NEXT: whilels p3.b, xzr, x1 |
| ; CHECK-NEXT: whilels p2.b, xzr, x4 |
| ; CHECK-NEXT: mov w4, #82 // =0x52 |
| ; CHECK-NEXT: whilels p1.b, xzr, x6 |
| ; CHECK-NEXT: mov w6, #95 // =0x5f |
| ; CHECK-NEXT: tbz w8, #23, .LBB43_38 |
| ; CHECK-NEXT: // %bb.37: // %cond.load89 |
| ; CHECK-NEXT: mov w14, #23 // =0x17 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p4/m, w14 |
| ; CHECK-NEXT: .LBB43_38: // %else90 |
| ; CHECK-NEXT: lastb w14, p3, z1.b |
| ; CHECK-NEXT: bfi x9, x10, #1, #1 |
| ; CHECK-NEXT: ubfiz x23, x16, #26, #1 |
| ; CHECK-NEXT: lastb w15, p2, z1.b |
| ; CHECK-NEXT: lastb w1, p1, z1.b |
| ; CHECK-NEXT: whilels p3.b, xzr, x18 |
| ; CHECK-NEXT: whilels p2.b, xzr, x5 |
| ; CHECK-NEXT: ubfiz x5, x12, #14, #1 |
| ; CHECK-NEXT: mov w12, #70 // =0x46 |
| ; CHECK-NEXT: whilels p1.b, xzr, x7 |
| ; CHECK-NEXT: orr x7, x20, x21 |
| ; CHECK-NEXT: orr x20, x24, x22 |
| ; CHECK-NEXT: tbz w8, #24, .LBB43_40 |
| ; CHECK-NEXT: // %bb.39: // %cond.load93 |
| ; CHECK-NEXT: mov w10, #24 // =0x18 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w10 |
| ; CHECK-NEXT: ldrb w10, [x0], #1 |
| ; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p4/m, w10 |
| ; CHECK-NEXT: .LBB43_40: // %else94 |
| ; CHECK-NEXT: lastb w10, p3, z1.b |
| ; CHECK-NEXT: bfi x9, x11, #2, #1 |
| ; CHECK-NEXT: orr x5, x7, x5 |
| ; CHECK-NEXT: lastb w16, p2, z1.b |
| ; CHECK-NEXT: lastb w18, p1, z1.b |
| ; CHECK-NEXT: whilels p4.b, xzr, x3 |
| ; CHECK-NEXT: ubfiz x3, x13, #15, #1 |
| ; CHECK-NEXT: whilels p2.b, xzr, x4 |
| ; CHECK-NEXT: ubfiz x4, x17, #27, #1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x19 |
| ; CHECK-NEXT: whilels p3.b, xzr, x6 |
| ; CHECK-NEXT: orr x6, x20, x23 |
| ; CHECK-NEXT: tbz w8, #25, .LBB43_42 |
| ; CHECK-NEXT: // %bb.41: // %cond.load97 |
| ; CHECK-NEXT: mov w11, #25 // =0x19 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w11 |
| ; CHECK-NEXT: ldrb w11, [x0], #1 |
| ; CHECK-NEXT: cmpeq p5.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p5/m, w11 |
| ; CHECK-NEXT: .LBB43_42: // %else98 |
| ; CHECK-NEXT: lastb w11, p4, z1.b |
| ; CHECK-NEXT: bfi x9, x14, #3, #1 |
| ; CHECK-NEXT: ubfiz x15, x15, #16, #1 |
| ; CHECK-NEXT: ubfiz x1, x1, #28, #1 |
| ; CHECK-NEXT: orr x3, x5, x3 |
| ; CHECK-NEXT: orr x4, x6, x4 |
| ; CHECK-NEXT: lastb w13, p2, z1.b |
| ; CHECK-NEXT: mov w14, #96 // =0x60 |
| ; CHECK-NEXT: lastb w17, p1, z1.b |
| ; CHECK-NEXT: lastb w7, p3, z1.b |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: tbz w8, #26, .LBB43_44 |
| ; CHECK-NEXT: // %bb.43: // %cond.load101 |
| ; CHECK-NEXT: mov w12, #26 // =0x1a |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w12 |
| ; CHECK-NEXT: ldrb w12, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w12 |
| ; CHECK-NEXT: .LBB43_44: // %else102 |
| ; CHECK-NEXT: lastb w12, p1, z1.b |
| ; CHECK-NEXT: bfi x9, x10, #4, #1 |
| ; CHECK-NEXT: ubfiz x16, x16, #17, #1 |
| ; CHECK-NEXT: ubfiz x18, x18, #29, #1 |
| ; CHECK-NEXT: lsl w10, w7, #31 |
| ; CHECK-NEXT: orr x3, x3, x15 |
| ; CHECK-NEXT: orr x1, x4, x1 |
| ; CHECK-NEXT: mov w15, #97 // =0x61 |
| ; CHECK-NEXT: tbz w8, #27, .LBB43_46 |
| ; CHECK-NEXT: // %bb.45: // %cond.load105 |
| ; CHECK-NEXT: mov w4, #27 // =0x1b |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w4 |
| ; CHECK-NEXT: ldrb w4, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w4 |
| ; CHECK-NEXT: .LBB43_46: // %else106 |
| ; CHECK-NEXT: bfi x9, x11, #5, #1 |
| ; CHECK-NEXT: ubfiz x13, x13, #18, #1 |
| ; CHECK-NEXT: ubfiz x17, x17, #30, #1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x14 |
| ; CHECK-NEXT: orr x16, x3, x16 |
| ; CHECK-NEXT: orr x18, x1, x18 |
| ; CHECK-NEXT: mov w11, #98 // =0x62 |
| ; CHECK-NEXT: tbz w8, #28, .LBB43_48 |
| ; CHECK-NEXT: // %bb.47: // %cond.load109 |
| ; CHECK-NEXT: mov w14, #28 // =0x1c |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_48: // %else110 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: bfi x9, x12, #6, #1 |
| ; CHECK-NEXT: mov w12, #99 // =0x63 |
| ; CHECK-NEXT: whilels p1.b, xzr, x15 |
| ; CHECK-NEXT: orr x15, x16, x13 |
| ; CHECK-NEXT: orr x16, x18, x17 |
| ; CHECK-NEXT: tbz w8, #29, .LBB43_50 |
| ; CHECK-NEXT: // %bb.49: // %cond.load113 |
| ; CHECK-NEXT: mov w13, #29 // =0x1d |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_50: // %else114 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x9, x15 |
| ; CHECK-NEXT: orr x10, x16, x10 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #100 // =0x64 |
| ; CHECK-NEXT: tbz w8, #30, .LBB43_52 |
| ; CHECK-NEXT: // %bb.51: // %cond.load117 |
| ; CHECK-NEXT: mov w14, #30 // =0x1e |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_52: // %else118 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #101 // =0x65 |
| ; CHECK-NEXT: tbz w8, #31, .LBB43_54 |
| ; CHECK-NEXT: // %bb.53: // %cond.load121 |
| ; CHECK-NEXT: mov w13, #31 // =0x1f |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_54: // %else122 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #32 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #102 // =0x66 |
| ; CHECK-NEXT: tbz x8, #32, .LBB43_56 |
| ; CHECK-NEXT: // %bb.55: // %cond.load125 |
| ; CHECK-NEXT: mov w14, #32 // =0x20 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_56: // %else126 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #33 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #103 // =0x67 |
| ; CHECK-NEXT: tbz x8, #33, .LBB43_58 |
| ; CHECK-NEXT: // %bb.57: // %cond.load129 |
| ; CHECK-NEXT: mov w13, #33 // =0x21 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_58: // %else130 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #34 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #104 // =0x68 |
| ; CHECK-NEXT: tbz x8, #34, .LBB43_60 |
| ; CHECK-NEXT: // %bb.59: // %cond.load133 |
| ; CHECK-NEXT: mov w14, #34 // =0x22 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_60: // %else134 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #35 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #105 // =0x69 |
| ; CHECK-NEXT: tbz x8, #35, .LBB43_62 |
| ; CHECK-NEXT: // %bb.61: // %cond.load137 |
| ; CHECK-NEXT: mov w13, #35 // =0x23 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_62: // %else138 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #36 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #106 // =0x6a |
| ; CHECK-NEXT: tbz x8, #36, .LBB43_64 |
| ; CHECK-NEXT: // %bb.63: // %cond.load141 |
| ; CHECK-NEXT: mov w14, #36 // =0x24 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_64: // %else142 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #37 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #107 // =0x6b |
| ; CHECK-NEXT: tbz x8, #37, .LBB43_66 |
| ; CHECK-NEXT: // %bb.65: // %cond.load145 |
| ; CHECK-NEXT: mov w13, #37 // =0x25 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_66: // %else146 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #38 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #108 // =0x6c |
| ; CHECK-NEXT: tbz x8, #38, .LBB43_68 |
| ; CHECK-NEXT: // %bb.67: // %cond.load149 |
| ; CHECK-NEXT: mov w14, #38 // =0x26 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_68: // %else150 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #39 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #109 // =0x6d |
| ; CHECK-NEXT: tbz x8, #39, .LBB43_70 |
| ; CHECK-NEXT: // %bb.69: // %cond.load153 |
| ; CHECK-NEXT: mov w13, #39 // =0x27 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_70: // %else154 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #40 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #110 // =0x6e |
| ; CHECK-NEXT: tbz x8, #40, .LBB43_72 |
| ; CHECK-NEXT: // %bb.71: // %cond.load157 |
| ; CHECK-NEXT: mov w14, #40 // =0x28 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_72: // %else158 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #41 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #111 // =0x6f |
| ; CHECK-NEXT: tbz x8, #41, .LBB43_74 |
| ; CHECK-NEXT: // %bb.73: // %cond.load161 |
| ; CHECK-NEXT: mov w13, #41 // =0x29 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_74: // %else162 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #42 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #112 // =0x70 |
| ; CHECK-NEXT: tbz x8, #42, .LBB43_76 |
| ; CHECK-NEXT: // %bb.75: // %cond.load165 |
| ; CHECK-NEXT: mov w14, #42 // =0x2a |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_76: // %else166 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #43 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #113 // =0x71 |
| ; CHECK-NEXT: tbz x8, #43, .LBB43_78 |
| ; CHECK-NEXT: // %bb.77: // %cond.load169 |
| ; CHECK-NEXT: mov w13, #43 // =0x2b |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_78: // %else170 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #44 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #114 // =0x72 |
| ; CHECK-NEXT: tbz x8, #44, .LBB43_80 |
| ; CHECK-NEXT: // %bb.79: // %cond.load173 |
| ; CHECK-NEXT: mov w14, #44 // =0x2c |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_80: // %else174 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #45 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #115 // =0x73 |
| ; CHECK-NEXT: tbz x8, #45, .LBB43_82 |
| ; CHECK-NEXT: // %bb.81: // %cond.load177 |
| ; CHECK-NEXT: mov w13, #45 // =0x2d |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_82: // %else178 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #46 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #116 // =0x74 |
| ; CHECK-NEXT: tbz x8, #46, .LBB43_84 |
| ; CHECK-NEXT: // %bb.83: // %cond.load181 |
| ; CHECK-NEXT: mov w14, #46 // =0x2e |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_84: // %else182 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #47 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #117 // =0x75 |
| ; CHECK-NEXT: tbz x8, #47, .LBB43_86 |
| ; CHECK-NEXT: // %bb.85: // %cond.load185 |
| ; CHECK-NEXT: mov w13, #47 // =0x2f |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_86: // %else186 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #48 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #118 // =0x76 |
| ; CHECK-NEXT: tbz x8, #48, .LBB43_88 |
| ; CHECK-NEXT: // %bb.87: // %cond.load189 |
| ; CHECK-NEXT: mov w14, #48 // =0x30 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_88: // %else190 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #49 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #119 // =0x77 |
| ; CHECK-NEXT: tbz x8, #49, .LBB43_90 |
| ; CHECK-NEXT: // %bb.89: // %cond.load193 |
| ; CHECK-NEXT: mov w13, #49 // =0x31 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_90: // %else194 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #50 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #120 // =0x78 |
| ; CHECK-NEXT: tbz x8, #50, .LBB43_92 |
| ; CHECK-NEXT: // %bb.91: // %cond.load197 |
| ; CHECK-NEXT: mov w14, #50 // =0x32 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_92: // %else198 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #51 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #121 // =0x79 |
| ; CHECK-NEXT: tbz x8, #51, .LBB43_94 |
| ; CHECK-NEXT: // %bb.93: // %cond.load201 |
| ; CHECK-NEXT: mov w13, #51 // =0x33 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_94: // %else202 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #52 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #122 // =0x7a |
| ; CHECK-NEXT: tbz x8, #52, .LBB43_96 |
| ; CHECK-NEXT: // %bb.95: // %cond.load205 |
| ; CHECK-NEXT: mov w14, #52 // =0x34 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_96: // %else206 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #53 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #123 // =0x7b |
| ; CHECK-NEXT: tbz x8, #53, .LBB43_98 |
| ; CHECK-NEXT: // %bb.97: // %cond.load209 |
| ; CHECK-NEXT: mov w13, #53 // =0x35 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_98: // %else210 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #54 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #124 // =0x7c |
| ; CHECK-NEXT: tbz x8, #54, .LBB43_100 |
| ; CHECK-NEXT: // %bb.99: // %cond.load213 |
| ; CHECK-NEXT: mov w14, #54 // =0x36 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_100: // %else214 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #55 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: mov w12, #125 // =0x7d |
| ; CHECK-NEXT: tbz x8, #55, .LBB43_102 |
| ; CHECK-NEXT: // %bb.101: // %cond.load217 |
| ; CHECK-NEXT: mov w13, #55 // =0x37 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w13 |
| ; CHECK-NEXT: ldrb w13, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w13 |
| ; CHECK-NEXT: .LBB43_102: // %else218 |
| ; CHECK-NEXT: lastb w13, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x9, lsl #56 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: mov w11, #126 // =0x7e |
| ; CHECK-NEXT: tbz x8, #56, .LBB43_104 |
| ; CHECK-NEXT: // %bb.103: // %cond.load221 |
| ; CHECK-NEXT: mov w14, #56 // =0x38 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_104: // %else222 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x15, x15, x10, lsl #57 |
| ; CHECK-NEXT: and w10, w13, #0x1 |
| ; CHECK-NEXT: whilels p1.b, xzr, x12 |
| ; CHECK-NEXT: tbz x8, #57, .LBB43_106 |
| ; CHECK-NEXT: // %bb.105: // %cond.load225 |
| ; CHECK-NEXT: mov w12, #57 // =0x39 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w12 |
| ; CHECK-NEXT: ldrb w12, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w12 |
| ; CHECK-NEXT: .LBB43_106: // %else226 |
| ; CHECK-NEXT: lastb w12, p1, z1.b |
| ; CHECK-NEXT: orr x13, x15, x9, lsl #58 |
| ; CHECK-NEXT: mov w9, #127 // =0x7f |
| ; CHECK-NEXT: whilels p1.b, xzr, x11 |
| ; CHECK-NEXT: and w11, w14, #0x1 |
| ; CHECK-NEXT: tbz x8, #58, .LBB43_108 |
| ; CHECK-NEXT: // %bb.107: // %cond.load229 |
| ; CHECK-NEXT: mov w14, #58 // =0x3a |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w14 |
| ; CHECK-NEXT: ldrb w14, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w14 |
| ; CHECK-NEXT: .LBB43_108: // %else230 |
| ; CHECK-NEXT: lastb w14, p1, z1.b |
| ; CHECK-NEXT: orr x13, x13, x10, lsl #59 |
| ; CHECK-NEXT: and w10, w12, #0x1 |
| ; CHECK-NEXT: tbz x8, #59, .LBB43_110 |
| ; CHECK-NEXT: // %bb.109: // %cond.load233 |
| ; CHECK-NEXT: mov w12, #59 // =0x3b |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w12 |
| ; CHECK-NEXT: ldrb w12, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w12 |
| ; CHECK-NEXT: .LBB43_110: // %else234 |
| ; CHECK-NEXT: orr x12, x13, x11, lsl #60 |
| ; CHECK-NEXT: whilels p1.b, xzr, x9 |
| ; CHECK-NEXT: and w9, w14, #0x1 |
| ; CHECK-NEXT: tbz x8, #60, .LBB43_112 |
| ; CHECK-NEXT: // %bb.111: // %cond.load237 |
| ; CHECK-NEXT: mov w11, #60 // =0x3c |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w11 |
| ; CHECK-NEXT: ldrb w11, [x0], #1 |
| ; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p2/m, w11 |
| ; CHECK-NEXT: .LBB43_112: // %else238 |
| ; CHECK-NEXT: lastb w11, p1, z1.b |
| ; CHECK-NEXT: orr x10, x12, x10, lsl #61 |
| ; CHECK-NEXT: tbnz x8, #61, .LBB43_191 |
| ; CHECK-NEXT: // %bb.113: // %else242 |
| ; CHECK-NEXT: orr x9, x10, x9, lsl #62 |
| ; CHECK-NEXT: tbnz x8, #62, .LBB43_192 |
| ; CHECK-NEXT: .LBB43_114: // %else246 |
| ; CHECK-NEXT: orr x9, x9, x11, lsl #63 |
| ; CHECK-NEXT: tbnz x8, #63, .LBB43_193 |
| ; CHECK-NEXT: .LBB43_115: // %else250 |
| ; CHECK-NEXT: tbnz w9, #0, .LBB43_194 |
| ; CHECK-NEXT: .LBB43_116: // %else254 |
| ; CHECK-NEXT: tbnz w9, #1, .LBB43_195 |
| ; CHECK-NEXT: .LBB43_117: // %else258 |
| ; CHECK-NEXT: tbnz w9, #2, .LBB43_196 |
| ; CHECK-NEXT: .LBB43_118: // %else262 |
| ; CHECK-NEXT: tbnz w9, #3, .LBB43_197 |
| ; CHECK-NEXT: .LBB43_119: // %else266 |
| ; CHECK-NEXT: tbnz w9, #4, .LBB43_198 |
| ; CHECK-NEXT: .LBB43_120: // %else270 |
| ; CHECK-NEXT: tbnz w9, #5, .LBB43_199 |
| ; CHECK-NEXT: .LBB43_121: // %else274 |
| ; CHECK-NEXT: tbnz w9, #6, .LBB43_200 |
| ; CHECK-NEXT: .LBB43_122: // %else278 |
| ; CHECK-NEXT: tbnz w9, #7, .LBB43_201 |
| ; CHECK-NEXT: .LBB43_123: // %else282 |
| ; CHECK-NEXT: tbnz w9, #8, .LBB43_202 |
| ; CHECK-NEXT: .LBB43_124: // %else286 |
| ; CHECK-NEXT: tbnz w9, #9, .LBB43_203 |
| ; CHECK-NEXT: .LBB43_125: // %else290 |
| ; CHECK-NEXT: tbnz w9, #10, .LBB43_204 |
| ; CHECK-NEXT: .LBB43_126: // %else294 |
| ; CHECK-NEXT: tbnz w9, #11, .LBB43_205 |
| ; CHECK-NEXT: .LBB43_127: // %else298 |
| ; CHECK-NEXT: tbnz w9, #12, .LBB43_206 |
| ; CHECK-NEXT: .LBB43_128: // %else302 |
| ; CHECK-NEXT: tbnz w9, #13, .LBB43_207 |
| ; CHECK-NEXT: .LBB43_129: // %else306 |
| ; CHECK-NEXT: tbnz w9, #14, .LBB43_208 |
| ; CHECK-NEXT: .LBB43_130: // %else310 |
| ; CHECK-NEXT: tbnz w9, #15, .LBB43_209 |
| ; CHECK-NEXT: .LBB43_131: // %else314 |
| ; CHECK-NEXT: tbnz w9, #16, .LBB43_210 |
| ; CHECK-NEXT: .LBB43_132: // %else318 |
| ; CHECK-NEXT: tbnz w9, #17, .LBB43_211 |
| ; CHECK-NEXT: .LBB43_133: // %else322 |
| ; CHECK-NEXT: tbnz w9, #18, .LBB43_212 |
| ; CHECK-NEXT: .LBB43_134: // %else326 |
| ; CHECK-NEXT: tbnz w9, #19, .LBB43_213 |
| ; CHECK-NEXT: .LBB43_135: // %else330 |
| ; CHECK-NEXT: tbnz w9, #20, .LBB43_214 |
| ; CHECK-NEXT: .LBB43_136: // %else334 |
| ; CHECK-NEXT: tbnz w9, #21, .LBB43_215 |
| ; CHECK-NEXT: .LBB43_137: // %else338 |
| ; CHECK-NEXT: tbnz w9, #22, .LBB43_216 |
| ; CHECK-NEXT: .LBB43_138: // %else342 |
| ; CHECK-NEXT: tbnz w9, #23, .LBB43_217 |
| ; CHECK-NEXT: .LBB43_139: // %else346 |
| ; CHECK-NEXT: tbnz w9, #24, .LBB43_218 |
| ; CHECK-NEXT: .LBB43_140: // %else350 |
| ; CHECK-NEXT: tbnz w9, #25, .LBB43_219 |
| ; CHECK-NEXT: .LBB43_141: // %else354 |
| ; CHECK-NEXT: tbnz w9, #26, .LBB43_220 |
| ; CHECK-NEXT: .LBB43_142: // %else358 |
| ; CHECK-NEXT: tbnz w9, #27, .LBB43_221 |
| ; CHECK-NEXT: .LBB43_143: // %else362 |
| ; CHECK-NEXT: tbnz w9, #28, .LBB43_222 |
| ; CHECK-NEXT: .LBB43_144: // %else366 |
| ; CHECK-NEXT: tbnz w9, #29, .LBB43_223 |
| ; CHECK-NEXT: .LBB43_145: // %else370 |
| ; CHECK-NEXT: tbnz w9, #30, .LBB43_224 |
| ; CHECK-NEXT: .LBB43_146: // %else374 |
| ; CHECK-NEXT: tbnz w9, #31, .LBB43_225 |
| ; CHECK-NEXT: .LBB43_147: // %else378 |
| ; CHECK-NEXT: tbnz x9, #32, .LBB43_226 |
| ; CHECK-NEXT: .LBB43_148: // %else382 |
| ; CHECK-NEXT: tbnz x9, #33, .LBB43_227 |
| ; CHECK-NEXT: .LBB43_149: // %else386 |
| ; CHECK-NEXT: tbnz x9, #34, .LBB43_228 |
| ; CHECK-NEXT: .LBB43_150: // %else390 |
| ; CHECK-NEXT: tbnz x9, #35, .LBB43_229 |
| ; CHECK-NEXT: .LBB43_151: // %else394 |
| ; CHECK-NEXT: tbnz x9, #36, .LBB43_230 |
| ; CHECK-NEXT: .LBB43_152: // %else398 |
| ; CHECK-NEXT: tbnz x9, #37, .LBB43_231 |
| ; CHECK-NEXT: .LBB43_153: // %else402 |
| ; CHECK-NEXT: tbnz x9, #38, .LBB43_232 |
| ; CHECK-NEXT: .LBB43_154: // %else406 |
| ; CHECK-NEXT: tbnz x9, #39, .LBB43_233 |
| ; CHECK-NEXT: .LBB43_155: // %else410 |
| ; CHECK-NEXT: tbnz x9, #40, .LBB43_234 |
| ; CHECK-NEXT: .LBB43_156: // %else414 |
| ; CHECK-NEXT: tbnz x9, #41, .LBB43_235 |
| ; CHECK-NEXT: .LBB43_157: // %else418 |
| ; CHECK-NEXT: tbnz x9, #42, .LBB43_236 |
| ; CHECK-NEXT: .LBB43_158: // %else422 |
| ; CHECK-NEXT: tbnz x9, #43, .LBB43_237 |
| ; CHECK-NEXT: .LBB43_159: // %else426 |
| ; CHECK-NEXT: tbnz x9, #44, .LBB43_238 |
| ; CHECK-NEXT: .LBB43_160: // %else430 |
| ; CHECK-NEXT: tbnz x9, #45, .LBB43_239 |
| ; CHECK-NEXT: .LBB43_161: // %else434 |
| ; CHECK-NEXT: tbnz x9, #46, .LBB43_240 |
| ; CHECK-NEXT: .LBB43_162: // %else438 |
| ; CHECK-NEXT: tbnz x9, #47, .LBB43_241 |
| ; CHECK-NEXT: .LBB43_163: // %else442 |
| ; CHECK-NEXT: tbnz x9, #48, .LBB43_242 |
| ; CHECK-NEXT: .LBB43_164: // %else446 |
| ; CHECK-NEXT: tbnz x9, #49, .LBB43_243 |
| ; CHECK-NEXT: .LBB43_165: // %else450 |
| ; CHECK-NEXT: tbnz x9, #50, .LBB43_244 |
| ; CHECK-NEXT: .LBB43_166: // %else454 |
| ; CHECK-NEXT: tbnz x9, #51, .LBB43_245 |
| ; CHECK-NEXT: .LBB43_167: // %else458 |
| ; CHECK-NEXT: tbnz x9, #52, .LBB43_246 |
| ; CHECK-NEXT: .LBB43_168: // %else462 |
| ; CHECK-NEXT: tbnz x9, #53, .LBB43_247 |
| ; CHECK-NEXT: .LBB43_169: // %else466 |
| ; CHECK-NEXT: tbnz x9, #54, .LBB43_248 |
| ; CHECK-NEXT: .LBB43_170: // %else470 |
| ; CHECK-NEXT: tbnz x9, #55, .LBB43_249 |
| ; CHECK-NEXT: .LBB43_171: // %else474 |
| ; CHECK-NEXT: tbnz x9, #56, .LBB43_250 |
| ; CHECK-NEXT: .LBB43_172: // %else478 |
| ; CHECK-NEXT: tbnz x9, #57, .LBB43_251 |
| ; CHECK-NEXT: .LBB43_173: // %else482 |
| ; CHECK-NEXT: tbnz x9, #58, .LBB43_252 |
| ; CHECK-NEXT: .LBB43_174: // %else486 |
| ; CHECK-NEXT: tbnz x9, #59, .LBB43_253 |
| ; CHECK-NEXT: .LBB43_175: // %else490 |
| ; CHECK-NEXT: tbnz x9, #60, .LBB43_254 |
| ; CHECK-NEXT: .LBB43_176: // %else494 |
| ; CHECK-NEXT: tbnz x9, #61, .LBB43_255 |
| ; CHECK-NEXT: .LBB43_177: // %else498 |
| ; CHECK-NEXT: tbnz x9, #62, .LBB43_256 |
| ; CHECK-NEXT: .LBB43_178: // %else502 |
| ; CHECK-NEXT: tbz x9, #63, .LBB43_180 |
| ; CHECK-NEXT: .LBB43_179: // %cond.load505 |
| ; CHECK-NEXT: mov w8, #127 // =0x7f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0] |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: .LBB43_180: // %else506 |
| ; CHECK-NEXT: uunpklo z0.h, z0.b |
| ; CHECK-NEXT: ptrue p0.h, vl128 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: st1h { z0.h }, p0, [x2] |
| ; CHECK-NEXT: add sp, sp, #64 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB43_181: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #3, .LBB43_6 |
| ; CHECK-NEXT: .LBB43_182: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #4, .LBB43_7 |
| ; CHECK-NEXT: .LBB43_183: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #5, .LBB43_8 |
| ; CHECK-NEXT: .LBB43_184: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #6, .LBB43_9 |
| ; CHECK-NEXT: .LBB43_185: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #7, .LBB43_10 |
| ; CHECK-NEXT: .LBB43_186: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #8, .LBB43_11 |
| ; CHECK-NEXT: .LBB43_187: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #9, .LBB43_12 |
| ; CHECK-NEXT: .LBB43_188: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #10, .LBB43_13 |
| ; CHECK-NEXT: .LBB43_189: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #11, .LBB43_14 |
| ; CHECK-NEXT: .LBB43_190: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z2.b, #0, #1 |
| ; CHECK-NEXT: mov z3.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB43_15 |
| ; CHECK-NEXT: b .LBB43_16 |
| ; CHECK-NEXT: .LBB43_191: // %cond.load241 |
| ; CHECK-NEXT: mov w12, #61 // =0x3d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w12 |
| ; CHECK-NEXT: ldrb w12, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w12 |
| ; CHECK-NEXT: orr x9, x10, x9, lsl #62 |
| ; CHECK-NEXT: tbz x8, #62, .LBB43_114 |
| ; CHECK-NEXT: .LBB43_192: // %cond.load245 |
| ; CHECK-NEXT: mov w10, #62 // =0x3e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w10 |
| ; CHECK-NEXT: ldrb w10, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w10 |
| ; CHECK-NEXT: orr x9, x9, x11, lsl #63 |
| ; CHECK-NEXT: tbz x8, #63, .LBB43_115 |
| ; CHECK-NEXT: .LBB43_193: // %cond.load249 |
| ; CHECK-NEXT: mov w8, #63 // =0x3f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #0, .LBB43_116 |
| ; CHECK-NEXT: .LBB43_194: // %cond.load253 |
| ; CHECK-NEXT: mov w8, #64 // =0x40 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #1, .LBB43_117 |
| ; CHECK-NEXT: .LBB43_195: // %cond.load257 |
| ; CHECK-NEXT: mov w8, #65 // =0x41 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #2, .LBB43_118 |
| ; CHECK-NEXT: .LBB43_196: // %cond.load261 |
| ; CHECK-NEXT: mov w8, #66 // =0x42 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #3, .LBB43_119 |
| ; CHECK-NEXT: .LBB43_197: // %cond.load265 |
| ; CHECK-NEXT: mov w8, #67 // =0x43 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #4, .LBB43_120 |
| ; CHECK-NEXT: .LBB43_198: // %cond.load269 |
| ; CHECK-NEXT: mov w8, #68 // =0x44 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #5, .LBB43_121 |
| ; CHECK-NEXT: .LBB43_199: // %cond.load273 |
| ; CHECK-NEXT: mov w8, #69 // =0x45 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #6, .LBB43_122 |
| ; CHECK-NEXT: .LBB43_200: // %cond.load277 |
| ; CHECK-NEXT: mov w8, #70 // =0x46 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #7, .LBB43_123 |
| ; CHECK-NEXT: .LBB43_201: // %cond.load281 |
| ; CHECK-NEXT: mov w8, #71 // =0x47 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #8, .LBB43_124 |
| ; CHECK-NEXT: .LBB43_202: // %cond.load285 |
| ; CHECK-NEXT: mov w8, #72 // =0x48 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #9, .LBB43_125 |
| ; CHECK-NEXT: .LBB43_203: // %cond.load289 |
| ; CHECK-NEXT: mov w8, #73 // =0x49 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #10, .LBB43_126 |
| ; CHECK-NEXT: .LBB43_204: // %cond.load293 |
| ; CHECK-NEXT: mov w8, #74 // =0x4a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #11, .LBB43_127 |
| ; CHECK-NEXT: .LBB43_205: // %cond.load297 |
| ; CHECK-NEXT: mov w8, #75 // =0x4b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #12, .LBB43_128 |
| ; CHECK-NEXT: .LBB43_206: // %cond.load301 |
| ; CHECK-NEXT: mov w8, #76 // =0x4c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #13, .LBB43_129 |
| ; CHECK-NEXT: .LBB43_207: // %cond.load305 |
| ; CHECK-NEXT: mov w8, #77 // =0x4d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #14, .LBB43_130 |
| ; CHECK-NEXT: .LBB43_208: // %cond.load309 |
| ; CHECK-NEXT: mov w8, #78 // =0x4e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #15, .LBB43_131 |
| ; CHECK-NEXT: .LBB43_209: // %cond.load313 |
| ; CHECK-NEXT: mov w8, #79 // =0x4f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #16, .LBB43_132 |
| ; CHECK-NEXT: .LBB43_210: // %cond.load317 |
| ; CHECK-NEXT: mov w8, #80 // =0x50 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #17, .LBB43_133 |
| ; CHECK-NEXT: .LBB43_211: // %cond.load321 |
| ; CHECK-NEXT: mov w8, #81 // =0x51 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #18, .LBB43_134 |
| ; CHECK-NEXT: .LBB43_212: // %cond.load325 |
| ; CHECK-NEXT: mov w8, #82 // =0x52 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #19, .LBB43_135 |
| ; CHECK-NEXT: .LBB43_213: // %cond.load329 |
| ; CHECK-NEXT: mov w8, #83 // =0x53 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #20, .LBB43_136 |
| ; CHECK-NEXT: .LBB43_214: // %cond.load333 |
| ; CHECK-NEXT: mov w8, #84 // =0x54 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #21, .LBB43_137 |
| ; CHECK-NEXT: .LBB43_215: // %cond.load337 |
| ; CHECK-NEXT: mov w8, #85 // =0x55 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #22, .LBB43_138 |
| ; CHECK-NEXT: .LBB43_216: // %cond.load341 |
| ; CHECK-NEXT: mov w8, #86 // =0x56 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #23, .LBB43_139 |
| ; CHECK-NEXT: .LBB43_217: // %cond.load345 |
| ; CHECK-NEXT: mov w8, #87 // =0x57 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #24, .LBB43_140 |
| ; CHECK-NEXT: .LBB43_218: // %cond.load349 |
| ; CHECK-NEXT: mov w8, #88 // =0x58 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #25, .LBB43_141 |
| ; CHECK-NEXT: .LBB43_219: // %cond.load353 |
| ; CHECK-NEXT: mov w8, #89 // =0x59 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #26, .LBB43_142 |
| ; CHECK-NEXT: .LBB43_220: // %cond.load357 |
| ; CHECK-NEXT: mov w8, #90 // =0x5a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #27, .LBB43_143 |
| ; CHECK-NEXT: .LBB43_221: // %cond.load361 |
| ; CHECK-NEXT: mov w8, #91 // =0x5b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #28, .LBB43_144 |
| ; CHECK-NEXT: .LBB43_222: // %cond.load365 |
| ; CHECK-NEXT: mov w8, #92 // =0x5c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #29, .LBB43_145 |
| ; CHECK-NEXT: .LBB43_223: // %cond.load369 |
| ; CHECK-NEXT: mov w8, #93 // =0x5d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #30, .LBB43_146 |
| ; CHECK-NEXT: .LBB43_224: // %cond.load373 |
| ; CHECK-NEXT: mov w8, #94 // =0x5e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz w9, #31, .LBB43_147 |
| ; CHECK-NEXT: .LBB43_225: // %cond.load377 |
| ; CHECK-NEXT: mov w8, #95 // =0x5f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #32, .LBB43_148 |
| ; CHECK-NEXT: .LBB43_226: // %cond.load381 |
| ; CHECK-NEXT: mov w8, #96 // =0x60 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #33, .LBB43_149 |
| ; CHECK-NEXT: .LBB43_227: // %cond.load385 |
| ; CHECK-NEXT: mov w8, #97 // =0x61 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #34, .LBB43_150 |
| ; CHECK-NEXT: .LBB43_228: // %cond.load389 |
| ; CHECK-NEXT: mov w8, #98 // =0x62 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #35, .LBB43_151 |
| ; CHECK-NEXT: .LBB43_229: // %cond.load393 |
| ; CHECK-NEXT: mov w8, #99 // =0x63 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #36, .LBB43_152 |
| ; CHECK-NEXT: .LBB43_230: // %cond.load397 |
| ; CHECK-NEXT: mov w8, #100 // =0x64 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #37, .LBB43_153 |
| ; CHECK-NEXT: .LBB43_231: // %cond.load401 |
| ; CHECK-NEXT: mov w8, #101 // =0x65 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #38, .LBB43_154 |
| ; CHECK-NEXT: .LBB43_232: // %cond.load405 |
| ; CHECK-NEXT: mov w8, #102 // =0x66 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #39, .LBB43_155 |
| ; CHECK-NEXT: .LBB43_233: // %cond.load409 |
| ; CHECK-NEXT: mov w8, #103 // =0x67 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #40, .LBB43_156 |
| ; CHECK-NEXT: .LBB43_234: // %cond.load413 |
| ; CHECK-NEXT: mov w8, #104 // =0x68 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #41, .LBB43_157 |
| ; CHECK-NEXT: .LBB43_235: // %cond.load417 |
| ; CHECK-NEXT: mov w8, #105 // =0x69 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #42, .LBB43_158 |
| ; CHECK-NEXT: .LBB43_236: // %cond.load421 |
| ; CHECK-NEXT: mov w8, #106 // =0x6a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #43, .LBB43_159 |
| ; CHECK-NEXT: .LBB43_237: // %cond.load425 |
| ; CHECK-NEXT: mov w8, #107 // =0x6b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #44, .LBB43_160 |
| ; CHECK-NEXT: .LBB43_238: // %cond.load429 |
| ; CHECK-NEXT: mov w8, #108 // =0x6c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #45, .LBB43_161 |
| ; CHECK-NEXT: .LBB43_239: // %cond.load433 |
| ; CHECK-NEXT: mov w8, #109 // =0x6d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #46, .LBB43_162 |
| ; CHECK-NEXT: .LBB43_240: // %cond.load437 |
| ; CHECK-NEXT: mov w8, #110 // =0x6e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #47, .LBB43_163 |
| ; CHECK-NEXT: .LBB43_241: // %cond.load441 |
| ; CHECK-NEXT: mov w8, #111 // =0x6f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #48, .LBB43_164 |
| ; CHECK-NEXT: .LBB43_242: // %cond.load445 |
| ; CHECK-NEXT: mov w8, #112 // =0x70 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #49, .LBB43_165 |
| ; CHECK-NEXT: .LBB43_243: // %cond.load449 |
| ; CHECK-NEXT: mov w8, #113 // =0x71 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #50, .LBB43_166 |
| ; CHECK-NEXT: .LBB43_244: // %cond.load453 |
| ; CHECK-NEXT: mov w8, #114 // =0x72 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #51, .LBB43_167 |
| ; CHECK-NEXT: .LBB43_245: // %cond.load457 |
| ; CHECK-NEXT: mov w8, #115 // =0x73 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #52, .LBB43_168 |
| ; CHECK-NEXT: .LBB43_246: // %cond.load461 |
| ; CHECK-NEXT: mov w8, #116 // =0x74 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #53, .LBB43_169 |
| ; CHECK-NEXT: .LBB43_247: // %cond.load465 |
| ; CHECK-NEXT: mov w8, #117 // =0x75 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #54, .LBB43_170 |
| ; CHECK-NEXT: .LBB43_248: // %cond.load469 |
| ; CHECK-NEXT: mov w8, #118 // =0x76 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #55, .LBB43_171 |
| ; CHECK-NEXT: .LBB43_249: // %cond.load473 |
| ; CHECK-NEXT: mov w8, #119 // =0x77 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #56, .LBB43_172 |
| ; CHECK-NEXT: .LBB43_250: // %cond.load477 |
| ; CHECK-NEXT: mov w8, #120 // =0x78 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #57, .LBB43_173 |
| ; CHECK-NEXT: .LBB43_251: // %cond.load481 |
| ; CHECK-NEXT: mov w8, #121 // =0x79 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #58, .LBB43_174 |
| ; CHECK-NEXT: .LBB43_252: // %cond.load485 |
| ; CHECK-NEXT: mov w8, #122 // =0x7a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #59, .LBB43_175 |
| ; CHECK-NEXT: .LBB43_253: // %cond.load489 |
| ; CHECK-NEXT: mov w8, #123 // =0x7b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #60, .LBB43_176 |
| ; CHECK-NEXT: .LBB43_254: // %cond.load493 |
| ; CHECK-NEXT: mov w8, #124 // =0x7c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #61, .LBB43_177 |
| ; CHECK-NEXT: .LBB43_255: // %cond.load497 |
| ; CHECK-NEXT: mov w8, #125 // =0x7d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbz x9, #62, .LBB43_178 |
| ; CHECK-NEXT: .LBB43_256: // %cond.load501 |
| ; CHECK-NEXT: mov w8, #126 // =0x7e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: tbnz x9, #63, .LBB43_179 |
| ; CHECK-NEXT: b .LBB43_180 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v128i8i16: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.h, vl128 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.h }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h |
| ; CHECK-EXPAND-NEXT: whilelo p2.h, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.h }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h |
| ; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <128 x i8>, ptr %bp |
| %mask = icmp eq <128 x i8> %b, zeroinitializer |
| %load = call <128 x i8> @llvm.masked.expandload.v128i8(ptr %ap, <128 x i1> %mask, <128 x i8> poison) |
| %ext = zext <128 x i8> %load to <128 x i16> |
| store <128 x i16> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v64i8i32(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_zext_v64i8i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #112 |
| ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: ptrue p1.b, vl64 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x1] |
| ; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, #0 |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: umov w11, v0.b[1] |
| ; CHECK-NEXT: fmov w22, s0 |
| ; CHECK-NEXT: umov w12, v0.b[2] |
| ; CHECK-NEXT: umov w13, v0.b[3] |
| ; CHECK-NEXT: umov w14, v0.b[7] |
| ; CHECK-NEXT: umov w1, v0.b[8] |
| ; CHECK-NEXT: umov w16, v0.b[9] |
| ; CHECK-NEXT: mov z3.b, z0.b[18] |
| ; CHECK-NEXT: mov z5.b, z0.b[19] |
| ; CHECK-NEXT: and x22, x22, #0x1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: umov w17, v0.b[10] |
| ; CHECK-NEXT: bfi x22, x11, #1, #1 |
| ; CHECK-NEXT: mov z6.b, z0.b[20] |
| ; CHECK-NEXT: umov w3, v0.b[11] |
| ; CHECK-NEXT: mov z4.b, z0.b[21] |
| ; CHECK-NEXT: umov w9, v0.b[5] |
| ; CHECK-NEXT: mov z7.b, z0.b[22] |
| ; CHECK-NEXT: bfi x22, x12, #2, #1 |
| ; CHECK-NEXT: fmov w19, s3 |
| ; CHECK-NEXT: fmov w20, s5 |
| ; CHECK-NEXT: ubfiz x14, x14, #7, #1 |
| ; CHECK-NEXT: ubfiz x1, x1, #8, #1 |
| ; CHECK-NEXT: umov w4, v0.b[12] |
| ; CHECK-NEXT: bfi x22, x13, #3, #1 |
| ; CHECK-NEXT: mov z16.b, z0.b[23] |
| ; CHECK-NEXT: fmov w21, s6 |
| ; CHECK-NEXT: ubfiz x16, x16, #9, #1 |
| ; CHECK-NEXT: umov w8, v0.b[6] |
| ; CHECK-NEXT: umov w5, v0.b[13] |
| ; CHECK-NEXT: mov z17.b, z0.b[24] |
| ; CHECK-NEXT: fmov w23, s4 |
| ; CHECK-NEXT: orr x14, x14, x1 |
| ; CHECK-NEXT: bfi x22, x10, #4, #1 |
| ; CHECK-NEXT: ubfiz x10, x17, #10, #1 |
| ; CHECK-NEXT: mov z18.b, z0.b[25] |
| ; CHECK-NEXT: fmov w24, s7 |
| ; CHECK-NEXT: ubfiz x13, x19, #18, #1 |
| ; CHECK-NEXT: ubfiz x19, x20, #19, #1 |
| ; CHECK-NEXT: orr x14, x14, x16 |
| ; CHECK-NEXT: ubfiz x16, x3, #11, #1 |
| ; CHECK-NEXT: umov w15, v0.b[14] |
| ; CHECK-NEXT: mov z19.b, z0.b[26] |
| ; CHECK-NEXT: fmov w25, s16 |
| ; CHECK-NEXT: ubfiz x1, x21, #20, #1 |
| ; CHECK-NEXT: orr x10, x14, x10 |
| ; CHECK-NEXT: bfi x22, x9, #5, #1 |
| ; CHECK-NEXT: mov z20.b, z0.b[27] |
| ; CHECK-NEXT: fmov w26, s17 |
| ; CHECK-NEXT: orr x13, x13, x19 |
| ; CHECK-NEXT: ubfiz x9, x4, #12, #1 |
| ; CHECK-NEXT: orr x10, x10, x16 |
| ; CHECK-NEXT: ubfiz x16, x23, #21, #1 |
| ; CHECK-NEXT: umov w18, v0.b[15] |
| ; CHECK-NEXT: mov z1.b, z0.b[16] |
| ; CHECK-NEXT: mov z21.b, z0.b[28] |
| ; CHECK-NEXT: fmov w11, s18 |
| ; CHECK-NEXT: orr x13, x13, x1 |
| ; CHECK-NEXT: ubfiz x14, x5, #13, #1 |
| ; CHECK-NEXT: bfi x22, x8, #6, #1 |
| ; CHECK-NEXT: ubfiz x8, x24, #22, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[17] |
| ; CHECK-NEXT: mov z22.b, z0.b[29] |
| ; CHECK-NEXT: fmov w27, s19 |
| ; CHECK-NEXT: orr x9, x10, x9 |
| ; CHECK-NEXT: orr x10, x13, x16 |
| ; CHECK-NEXT: ubfiz x13, x25, #23, #1 |
| ; CHECK-NEXT: mov z5.b, z0.b[30] |
| ; CHECK-NEXT: fmov w28, s20 |
| ; CHECK-NEXT: orr x9, x9, x14 |
| ; CHECK-NEXT: orr x8, x10, x8 |
| ; CHECK-NEXT: ubfiz x10, x15, #14, #1 |
| ; CHECK-NEXT: ubfiz x14, x26, #24, #1 |
| ; CHECK-NEXT: fmov w6, s1 |
| ; CHECK-NEXT: fmov w29, s21 |
| ; CHECK-NEXT: orr x8, x8, x13 |
| ; CHECK-NEXT: ubfiz x11, x11, #25, #1 |
| ; CHECK-NEXT: fmov w7, s2 |
| ; CHECK-NEXT: fmov w30, s22 |
| ; CHECK-NEXT: ubfiz x13, x18, #15, #1 |
| ; CHECK-NEXT: orr x9, x9, x10 |
| ; CHECK-NEXT: orr x8, x8, x14 |
| ; CHECK-NEXT: ubfiz x10, x27, #26, #1 |
| ; CHECK-NEXT: fmov w12, s5 |
| ; CHECK-NEXT: orr x8, x8, x11 |
| ; CHECK-NEXT: ubfiz x11, x28, #27, #1 |
| ; CHECK-NEXT: mov z3.b, z0.b[31] |
| ; CHECK-NEXT: orr x9, x9, x13 |
| ; CHECK-NEXT: orr x8, x8, x10 |
| ; CHECK-NEXT: ubfiz x10, x6, #16, #1 |
| ; CHECK-NEXT: ubfiz x13, x29, #28, #1 |
| ; CHECK-NEXT: orr x8, x8, x11 |
| ; CHECK-NEXT: ubfiz x11, x7, #17, #1 |
| ; CHECK-NEXT: ubfiz x14, x30, #29, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[32] |
| ; CHECK-NEXT: orr x9, x9, x10 |
| ; CHECK-NEXT: orr x8, x8, x13 |
| ; CHECK-NEXT: ubfiz x10, x12, #30, #1 |
| ; CHECK-NEXT: fmov w12, s3 |
| ; CHECK-NEXT: orr x9, x9, x11 |
| ; CHECK-NEXT: orr x8, x8, x14 |
| ; CHECK-NEXT: mov z1.b, z0.b[33] |
| ; CHECK-NEXT: orr x9, x22, x9 |
| ; CHECK-NEXT: orr x8, x8, x10 |
| ; CHECK-NEXT: orr x8, x9, x8 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: lsl w10, w12, #31 |
| ; CHECK-NEXT: mov z2.b, z0.b[34] |
| ; CHECK-NEXT: orr x8, x8, x10 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #32 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[35] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #33 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[36] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #34 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[37] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #35 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[38] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #36 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[39] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #37 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[40] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #38 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[41] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #39 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[42] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #40 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[43] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #41 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[44] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #42 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[45] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #43 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[46] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #44 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[47] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #45 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[48] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #46 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[49] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #47 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[50] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #48 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[51] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #49 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[52] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #50 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[53] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #51 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[54] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #52 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[55] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #53 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[56] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #54 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[57] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #55 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[58] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #56 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[59] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #57 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[60] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #58 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[61] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: fmov w10, s1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #59 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[62] |
| ; CHECK-NEXT: mov z0.b, z0.b[63] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #60 |
| ; CHECK-NEXT: and w9, w10, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #61 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #62 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #63 |
| ; CHECK-NEXT: tbz w8, #0, .LBB44_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] |
| ; CHECK-NEXT: add x0, x0, #1 |
| ; CHECK-NEXT: tbnz w8, #1, .LBB44_3 |
| ; CHECK-NEXT: b .LBB44_4 |
| ; CHECK-NEXT: .LBB44_2: |
| ; CHECK-NEXT: adrp x9, .LCPI44_0 |
| ; CHECK-NEXT: add x9, x9, :lo12:.LCPI44_0 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9] |
| ; CHECK-NEXT: tbz w8, #1, .LBB44_4 |
| ; CHECK-NEXT: .LBB44_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: .LBB44_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB44_68 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB44_69 |
| ; CHECK-NEXT: .LBB44_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB44_70 |
| ; CHECK-NEXT: .LBB44_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB44_71 |
| ; CHECK-NEXT: .LBB44_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB44_72 |
| ; CHECK-NEXT: .LBB44_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB44_73 |
| ; CHECK-NEXT: .LBB44_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB44_74 |
| ; CHECK-NEXT: .LBB44_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB44_75 |
| ; CHECK-NEXT: .LBB44_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB44_76 |
| ; CHECK-NEXT: .LBB44_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB44_77 |
| ; CHECK-NEXT: .LBB44_14: // %else42 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB44_78 |
| ; CHECK-NEXT: .LBB44_15: // %else46 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB44_79 |
| ; CHECK-NEXT: .LBB44_16: // %else50 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB44_80 |
| ; CHECK-NEXT: .LBB44_17: // %else54 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB44_81 |
| ; CHECK-NEXT: .LBB44_18: // %else58 |
| ; CHECK-NEXT: tbnz w8, #16, .LBB44_82 |
| ; CHECK-NEXT: .LBB44_19: // %else62 |
| ; CHECK-NEXT: tbnz w8, #17, .LBB44_83 |
| ; CHECK-NEXT: .LBB44_20: // %else66 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB44_84 |
| ; CHECK-NEXT: .LBB44_21: // %else70 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB44_85 |
| ; CHECK-NEXT: .LBB44_22: // %else74 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB44_86 |
| ; CHECK-NEXT: .LBB44_23: // %else78 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB44_87 |
| ; CHECK-NEXT: .LBB44_24: // %else82 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB44_88 |
| ; CHECK-NEXT: .LBB44_25: // %else86 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB44_89 |
| ; CHECK-NEXT: .LBB44_26: // %else90 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB44_90 |
| ; CHECK-NEXT: .LBB44_27: // %else94 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB44_91 |
| ; CHECK-NEXT: .LBB44_28: // %else98 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB44_92 |
| ; CHECK-NEXT: .LBB44_29: // %else102 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB44_93 |
| ; CHECK-NEXT: .LBB44_30: // %else106 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB44_94 |
| ; CHECK-NEXT: .LBB44_31: // %else110 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB44_95 |
| ; CHECK-NEXT: .LBB44_32: // %else114 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB44_96 |
| ; CHECK-NEXT: .LBB44_33: // %else118 |
| ; CHECK-NEXT: tbnz w8, #31, .LBB44_97 |
| ; CHECK-NEXT: .LBB44_34: // %else122 |
| ; CHECK-NEXT: tbnz x8, #32, .LBB44_98 |
| ; CHECK-NEXT: .LBB44_35: // %else126 |
| ; CHECK-NEXT: tbnz x8, #33, .LBB44_99 |
| ; CHECK-NEXT: .LBB44_36: // %else130 |
| ; CHECK-NEXT: tbnz x8, #34, .LBB44_100 |
| ; CHECK-NEXT: .LBB44_37: // %else134 |
| ; CHECK-NEXT: tbnz x8, #35, .LBB44_101 |
| ; CHECK-NEXT: .LBB44_38: // %else138 |
| ; CHECK-NEXT: tbnz x8, #36, .LBB44_102 |
| ; CHECK-NEXT: .LBB44_39: // %else142 |
| ; CHECK-NEXT: tbnz x8, #37, .LBB44_103 |
| ; CHECK-NEXT: .LBB44_40: // %else146 |
| ; CHECK-NEXT: tbnz x8, #38, .LBB44_104 |
| ; CHECK-NEXT: .LBB44_41: // %else150 |
| ; CHECK-NEXT: tbnz x8, #39, .LBB44_105 |
| ; CHECK-NEXT: .LBB44_42: // %else154 |
| ; CHECK-NEXT: tbnz x8, #40, .LBB44_106 |
| ; CHECK-NEXT: .LBB44_43: // %else158 |
| ; CHECK-NEXT: tbnz x8, #41, .LBB44_107 |
| ; CHECK-NEXT: .LBB44_44: // %else162 |
| ; CHECK-NEXT: tbnz x8, #42, .LBB44_108 |
| ; CHECK-NEXT: .LBB44_45: // %else166 |
| ; CHECK-NEXT: tbnz x8, #43, .LBB44_109 |
| ; CHECK-NEXT: .LBB44_46: // %else170 |
| ; CHECK-NEXT: tbnz x8, #44, .LBB44_110 |
| ; CHECK-NEXT: .LBB44_47: // %else174 |
| ; CHECK-NEXT: tbnz x8, #45, .LBB44_111 |
| ; CHECK-NEXT: .LBB44_48: // %else178 |
| ; CHECK-NEXT: tbnz x8, #46, .LBB44_112 |
| ; CHECK-NEXT: .LBB44_49: // %else182 |
| ; CHECK-NEXT: tbnz x8, #47, .LBB44_113 |
| ; CHECK-NEXT: .LBB44_50: // %else186 |
| ; CHECK-NEXT: tbnz x8, #48, .LBB44_114 |
| ; CHECK-NEXT: .LBB44_51: // %else190 |
| ; CHECK-NEXT: tbnz x8, #49, .LBB44_115 |
| ; CHECK-NEXT: .LBB44_52: // %else194 |
| ; CHECK-NEXT: tbnz x8, #50, .LBB44_116 |
| ; CHECK-NEXT: .LBB44_53: // %else198 |
| ; CHECK-NEXT: tbnz x8, #51, .LBB44_117 |
| ; CHECK-NEXT: .LBB44_54: // %else202 |
| ; CHECK-NEXT: tbnz x8, #52, .LBB44_118 |
| ; CHECK-NEXT: .LBB44_55: // %else206 |
| ; CHECK-NEXT: tbnz x8, #53, .LBB44_119 |
| ; CHECK-NEXT: .LBB44_56: // %else210 |
| ; CHECK-NEXT: tbnz x8, #54, .LBB44_120 |
| ; CHECK-NEXT: .LBB44_57: // %else214 |
| ; CHECK-NEXT: tbnz x8, #55, .LBB44_121 |
| ; CHECK-NEXT: .LBB44_58: // %else218 |
| ; CHECK-NEXT: tbnz x8, #56, .LBB44_122 |
| ; CHECK-NEXT: .LBB44_59: // %else222 |
| ; CHECK-NEXT: tbnz x8, #57, .LBB44_123 |
| ; CHECK-NEXT: .LBB44_60: // %else226 |
| ; CHECK-NEXT: tbnz x8, #58, .LBB44_124 |
| ; CHECK-NEXT: .LBB44_61: // %else230 |
| ; CHECK-NEXT: tbnz x8, #59, .LBB44_125 |
| ; CHECK-NEXT: .LBB44_62: // %else234 |
| ; CHECK-NEXT: tbnz x8, #60, .LBB44_126 |
| ; CHECK-NEXT: .LBB44_63: // %else238 |
| ; CHECK-NEXT: tbnz x8, #61, .LBB44_127 |
| ; CHECK-NEXT: .LBB44_64: // %else242 |
| ; CHECK-NEXT: tbnz x8, #62, .LBB44_128 |
| ; CHECK-NEXT: .LBB44_65: // %else246 |
| ; CHECK-NEXT: tbz x8, #63, .LBB44_67 |
| ; CHECK-NEXT: .LBB44_66: // %cond.load249 |
| ; CHECK-NEXT: mov w8, #63 // =0x3f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0] |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: .LBB44_67: // %else250 |
| ; CHECK-NEXT: uunpklo z0.h, z0.b |
| ; CHECK-NEXT: ptrue p0.s, vl64 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-NEXT: add sp, sp, #112 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB44_68: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #3, .LBB44_6 |
| ; CHECK-NEXT: .LBB44_69: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #4, .LBB44_7 |
| ; CHECK-NEXT: .LBB44_70: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #5, .LBB44_8 |
| ; CHECK-NEXT: .LBB44_71: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #6, .LBB44_9 |
| ; CHECK-NEXT: .LBB44_72: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #7, .LBB44_10 |
| ; CHECK-NEXT: .LBB44_73: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #8, .LBB44_11 |
| ; CHECK-NEXT: .LBB44_74: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #9, .LBB44_12 |
| ; CHECK-NEXT: .LBB44_75: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #10, .LBB44_13 |
| ; CHECK-NEXT: .LBB44_76: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #11, .LBB44_14 |
| ; CHECK-NEXT: .LBB44_77: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #12, .LBB44_15 |
| ; CHECK-NEXT: .LBB44_78: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #13, .LBB44_16 |
| ; CHECK-NEXT: .LBB44_79: // %cond.load49 |
| ; CHECK-NEXT: mov w9, #13 // =0xd |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #14, .LBB44_17 |
| ; CHECK-NEXT: .LBB44_80: // %cond.load53 |
| ; CHECK-NEXT: mov w9, #14 // =0xe |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #15, .LBB44_18 |
| ; CHECK-NEXT: .LBB44_81: // %cond.load57 |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #16, .LBB44_19 |
| ; CHECK-NEXT: .LBB44_82: // %cond.load61 |
| ; CHECK-NEXT: mov w9, #16 // =0x10 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #17, .LBB44_20 |
| ; CHECK-NEXT: .LBB44_83: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #18, .LBB44_21 |
| ; CHECK-NEXT: .LBB44_84: // %cond.load69 |
| ; CHECK-NEXT: mov w9, #18 // =0x12 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #19, .LBB44_22 |
| ; CHECK-NEXT: .LBB44_85: // %cond.load73 |
| ; CHECK-NEXT: mov w9, #19 // =0x13 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #20, .LBB44_23 |
| ; CHECK-NEXT: .LBB44_86: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #21, .LBB44_24 |
| ; CHECK-NEXT: .LBB44_87: // %cond.load81 |
| ; CHECK-NEXT: mov w9, #21 // =0x15 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #22, .LBB44_25 |
| ; CHECK-NEXT: .LBB44_88: // %cond.load85 |
| ; CHECK-NEXT: mov w9, #22 // =0x16 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #23, .LBB44_26 |
| ; CHECK-NEXT: .LBB44_89: // %cond.load89 |
| ; CHECK-NEXT: mov w9, #23 // =0x17 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #24, .LBB44_27 |
| ; CHECK-NEXT: .LBB44_90: // %cond.load93 |
| ; CHECK-NEXT: mov w9, #24 // =0x18 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #25, .LBB44_28 |
| ; CHECK-NEXT: .LBB44_91: // %cond.load97 |
| ; CHECK-NEXT: mov w9, #25 // =0x19 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #26, .LBB44_29 |
| ; CHECK-NEXT: .LBB44_92: // %cond.load101 |
| ; CHECK-NEXT: mov w9, #26 // =0x1a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #27, .LBB44_30 |
| ; CHECK-NEXT: .LBB44_93: // %cond.load105 |
| ; CHECK-NEXT: mov w9, #27 // =0x1b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #28, .LBB44_31 |
| ; CHECK-NEXT: .LBB44_94: // %cond.load109 |
| ; CHECK-NEXT: mov w9, #28 // =0x1c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #29, .LBB44_32 |
| ; CHECK-NEXT: .LBB44_95: // %cond.load113 |
| ; CHECK-NEXT: mov w9, #29 // =0x1d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #30, .LBB44_33 |
| ; CHECK-NEXT: .LBB44_96: // %cond.load117 |
| ; CHECK-NEXT: mov w9, #30 // =0x1e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #31, .LBB44_34 |
| ; CHECK-NEXT: .LBB44_97: // %cond.load121 |
| ; CHECK-NEXT: mov w9, #31 // =0x1f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #32, .LBB44_35 |
| ; CHECK-NEXT: .LBB44_98: // %cond.load125 |
| ; CHECK-NEXT: mov w9, #32 // =0x20 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #33, .LBB44_36 |
| ; CHECK-NEXT: .LBB44_99: // %cond.load129 |
| ; CHECK-NEXT: mov w9, #33 // =0x21 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #34, .LBB44_37 |
| ; CHECK-NEXT: .LBB44_100: // %cond.load133 |
| ; CHECK-NEXT: mov w9, #34 // =0x22 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #35, .LBB44_38 |
| ; CHECK-NEXT: .LBB44_101: // %cond.load137 |
| ; CHECK-NEXT: mov w9, #35 // =0x23 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #36, .LBB44_39 |
| ; CHECK-NEXT: .LBB44_102: // %cond.load141 |
| ; CHECK-NEXT: mov w9, #36 // =0x24 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #37, .LBB44_40 |
| ; CHECK-NEXT: .LBB44_103: // %cond.load145 |
| ; CHECK-NEXT: mov w9, #37 // =0x25 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #38, .LBB44_41 |
| ; CHECK-NEXT: .LBB44_104: // %cond.load149 |
| ; CHECK-NEXT: mov w9, #38 // =0x26 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #39, .LBB44_42 |
| ; CHECK-NEXT: .LBB44_105: // %cond.load153 |
| ; CHECK-NEXT: mov w9, #39 // =0x27 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #40, .LBB44_43 |
| ; CHECK-NEXT: .LBB44_106: // %cond.load157 |
| ; CHECK-NEXT: mov w9, #40 // =0x28 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #41, .LBB44_44 |
| ; CHECK-NEXT: .LBB44_107: // %cond.load161 |
| ; CHECK-NEXT: mov w9, #41 // =0x29 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #42, .LBB44_45 |
| ; CHECK-NEXT: .LBB44_108: // %cond.load165 |
| ; CHECK-NEXT: mov w9, #42 // =0x2a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #43, .LBB44_46 |
| ; CHECK-NEXT: .LBB44_109: // %cond.load169 |
| ; CHECK-NEXT: mov w9, #43 // =0x2b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #44, .LBB44_47 |
| ; CHECK-NEXT: .LBB44_110: // %cond.load173 |
| ; CHECK-NEXT: mov w9, #44 // =0x2c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #45, .LBB44_48 |
| ; CHECK-NEXT: .LBB44_111: // %cond.load177 |
| ; CHECK-NEXT: mov w9, #45 // =0x2d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #46, .LBB44_49 |
| ; CHECK-NEXT: .LBB44_112: // %cond.load181 |
| ; CHECK-NEXT: mov w9, #46 // =0x2e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #47, .LBB44_50 |
| ; CHECK-NEXT: .LBB44_113: // %cond.load185 |
| ; CHECK-NEXT: mov w9, #47 // =0x2f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #48, .LBB44_51 |
| ; CHECK-NEXT: .LBB44_114: // %cond.load189 |
| ; CHECK-NEXT: mov w9, #48 // =0x30 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #49, .LBB44_52 |
| ; CHECK-NEXT: .LBB44_115: // %cond.load193 |
| ; CHECK-NEXT: mov w9, #49 // =0x31 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #50, .LBB44_53 |
| ; CHECK-NEXT: .LBB44_116: // %cond.load197 |
| ; CHECK-NEXT: mov w9, #50 // =0x32 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #51, .LBB44_54 |
| ; CHECK-NEXT: .LBB44_117: // %cond.load201 |
| ; CHECK-NEXT: mov w9, #51 // =0x33 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #52, .LBB44_55 |
| ; CHECK-NEXT: .LBB44_118: // %cond.load205 |
| ; CHECK-NEXT: mov w9, #52 // =0x34 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #53, .LBB44_56 |
| ; CHECK-NEXT: .LBB44_119: // %cond.load209 |
| ; CHECK-NEXT: mov w9, #53 // =0x35 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #54, .LBB44_57 |
| ; CHECK-NEXT: .LBB44_120: // %cond.load213 |
| ; CHECK-NEXT: mov w9, #54 // =0x36 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #55, .LBB44_58 |
| ; CHECK-NEXT: .LBB44_121: // %cond.load217 |
| ; CHECK-NEXT: mov w9, #55 // =0x37 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #56, .LBB44_59 |
| ; CHECK-NEXT: .LBB44_122: // %cond.load221 |
| ; CHECK-NEXT: mov w9, #56 // =0x38 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #57, .LBB44_60 |
| ; CHECK-NEXT: .LBB44_123: // %cond.load225 |
| ; CHECK-NEXT: mov w9, #57 // =0x39 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #58, .LBB44_61 |
| ; CHECK-NEXT: .LBB44_124: // %cond.load229 |
| ; CHECK-NEXT: mov w9, #58 // =0x3a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #59, .LBB44_62 |
| ; CHECK-NEXT: .LBB44_125: // %cond.load233 |
| ; CHECK-NEXT: mov w9, #59 // =0x3b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #60, .LBB44_63 |
| ; CHECK-NEXT: .LBB44_126: // %cond.load237 |
| ; CHECK-NEXT: mov w9, #60 // =0x3c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #61, .LBB44_64 |
| ; CHECK-NEXT: .LBB44_127: // %cond.load241 |
| ; CHECK-NEXT: mov w9, #61 // =0x3d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #62, .LBB44_65 |
| ; CHECK-NEXT: .LBB44_128: // %cond.load245 |
| ; CHECK-NEXT: mov w9, #62 // =0x3e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbnz x8, #63, .LBB44_66 |
| ; CHECK-NEXT: b .LBB44_67 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v64i8i32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl64 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s |
| ; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.s }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <64 x i8>, ptr %bp |
| %mask = icmp eq <64 x i8> %b, zeroinitializer |
| %load = call <64 x i8> @llvm.masked.expandload.v64i8(ptr %ap, <64 x i1> %mask, <64 x i8> poison) |
| %ext = zext <64 x i8> %load to <64 x i32> |
| store <64 x i32> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v32i8i64(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_zext_v32i8i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #112 |
| ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: ptrue p1.b, vl32 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x1] |
| ; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, #0 |
| ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: ptrue p0.b |
| ; CHECK-NEXT: umov w13, v0.b[1] |
| ; CHECK-NEXT: fmov w6, s0 |
| ; CHECK-NEXT: umov w4, v0.b[7] |
| ; CHECK-NEXT: umov w5, v0.b[8] |
| ; CHECK-NEXT: umov w12, v0.b[2] |
| ; CHECK-NEXT: umov w3, v0.b[9] |
| ; CHECK-NEXT: mov z5.b, z0.b[18] |
| ; CHECK-NEXT: mov z6.b, z0.b[19] |
| ; CHECK-NEXT: umov w11, v0.b[3] |
| ; CHECK-NEXT: and w6, w6, #0x1 |
| ; CHECK-NEXT: umov w1, v0.b[10] |
| ; CHECK-NEXT: mov z7.b, z0.b[20] |
| ; CHECK-NEXT: bfi w6, w13, #1, #1 |
| ; CHECK-NEXT: umov w18, v0.b[11] |
| ; CHECK-NEXT: mov z16.b, z0.b[21] |
| ; CHECK-NEXT: ubfiz w13, w4, #7, #1 |
| ; CHECK-NEXT: ubfiz w4, w5, #8, #1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: mov z17.b, z0.b[22] |
| ; CHECK-NEXT: fmov w20, s5 |
| ; CHECK-NEXT: fmov w21, s6 |
| ; CHECK-NEXT: bfi w6, w12, #2, #1 |
| ; CHECK-NEXT: umov w16, v0.b[12] |
| ; CHECK-NEXT: mov z18.b, z0.b[23] |
| ; CHECK-NEXT: fmov w22, s7 |
| ; CHECK-NEXT: orr w12, w13, w4 |
| ; CHECK-NEXT: ubfiz w13, w3, #9, #1 |
| ; CHECK-NEXT: umov w9, v0.b[5] |
| ; CHECK-NEXT: umov w17, v0.b[13] |
| ; CHECK-NEXT: mov z19.b, z0.b[24] |
| ; CHECK-NEXT: fmov w23, s16 |
| ; CHECK-NEXT: bfi w6, w11, #3, #1 |
| ; CHECK-NEXT: ubfiz w11, w1, #10, #1 |
| ; CHECK-NEXT: mov z20.b, z0.b[25] |
| ; CHECK-NEXT: fmov w24, s17 |
| ; CHECK-NEXT: ubfiz w3, w20, #18, #1 |
| ; CHECK-NEXT: ubfiz w4, w21, #19, #1 |
| ; CHECK-NEXT: orr w12, w12, w13 |
| ; CHECK-NEXT: ubfiz w13, w18, #11, #1 |
| ; CHECK-NEXT: mov z21.b, z0.b[26] |
| ; CHECK-NEXT: fmov w25, s18 |
| ; CHECK-NEXT: ubfiz w1, w22, #20, #1 |
| ; CHECK-NEXT: orr w11, w12, w11 |
| ; CHECK-NEXT: bfi w6, w10, #4, #1 |
| ; CHECK-NEXT: umov w14, v0.b[14] |
| ; CHECK-NEXT: fmov w26, s19 |
| ; CHECK-NEXT: orr w3, w3, w4 |
| ; CHECK-NEXT: orr w11, w11, w13 |
| ; CHECK-NEXT: ubfiz w12, w16, #12, #1 |
| ; CHECK-NEXT: ubfiz w13, w23, #21, #1 |
| ; CHECK-NEXT: mov z22.b, z0.b[27] |
| ; CHECK-NEXT: fmov w27, s20 |
| ; CHECK-NEXT: orr w10, w3, w1 |
| ; CHECK-NEXT: bfi w6, w9, #5, #1 |
| ; CHECK-NEXT: ubfiz w9, w17, #13, #1 |
| ; CHECK-NEXT: ubfiz w16, w24, #22, #1 |
| ; CHECK-NEXT: umov w8, v0.b[6] |
| ; CHECK-NEXT: umov w15, v0.b[15] |
| ; CHECK-NEXT: mov z3.b, z0.b[16] |
| ; CHECK-NEXT: mov z23.b, z0.b[28] |
| ; CHECK-NEXT: fmov w5, s21 |
| ; CHECK-NEXT: orr w11, w11, w12 |
| ; CHECK-NEXT: orr w10, w10, w13 |
| ; CHECK-NEXT: ubfiz w12, w25, #23, #1 |
| ; CHECK-NEXT: mov z4.b, z0.b[17] |
| ; CHECK-NEXT: mov z24.b, z0.b[29] |
| ; CHECK-NEXT: orr w9, w11, w9 |
| ; CHECK-NEXT: orr w10, w10, w16 |
| ; CHECK-NEXT: ubfiz w11, w26, #24, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[30] |
| ; CHECK-NEXT: fmov w28, s22 |
| ; CHECK-NEXT: orr w10, w10, w12 |
| ; CHECK-NEXT: ubfiz w12, w14, #14, #1 |
| ; CHECK-NEXT: ubfiz w13, w27, #25, #1 |
| ; CHECK-NEXT: fmov w7, s3 |
| ; CHECK-NEXT: fmov w29, s23 |
| ; CHECK-NEXT: orr w10, w10, w11 |
| ; CHECK-NEXT: ubfiz w14, w5, #26, #1 |
| ; CHECK-NEXT: fmov w19, s4 |
| ; CHECK-NEXT: fmov w30, s24 |
| ; CHECK-NEXT: ubfiz w11, w15, #15, #1 |
| ; CHECK-NEXT: bfi w6, w8, #6, #1 |
| ; CHECK-NEXT: orr w8, w9, w12 |
| ; CHECK-NEXT: orr w9, w10, w13 |
| ; CHECK-NEXT: orr w9, w9, w14 |
| ; CHECK-NEXT: ubfiz w10, w28, #27, #1 |
| ; CHECK-NEXT: fmov w14, s2 |
| ; CHECK-NEXT: orr w8, w8, w11 |
| ; CHECK-NEXT: ubfiz w11, w7, #16, #1 |
| ; CHECK-NEXT: ubfiz w13, w29, #28, #1 |
| ; CHECK-NEXT: ubfiz w12, w19, #17, #1 |
| ; CHECK-NEXT: orr w9, w9, w10 |
| ; CHECK-NEXT: ubfiz w10, w30, #29, #1 |
| ; CHECK-NEXT: mov z1.b, z0.b[31] |
| ; CHECK-NEXT: orr w8, w8, w11 |
| ; CHECK-NEXT: orr w9, w9, w13 |
| ; CHECK-NEXT: ubfiz w11, w14, #30, #1 |
| ; CHECK-NEXT: orr w8, w8, w12 |
| ; CHECK-NEXT: orr w9, w9, w10 |
| ; CHECK-NEXT: orr w8, w6, w8 |
| ; CHECK-NEXT: orr w9, w9, w11 |
| ; CHECK-NEXT: orr w8, w8, w9 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: orr w8, w8, w9, lsl #31 |
| ; CHECK-NEXT: tbz w8, #0, .LBB45_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] |
| ; CHECK-NEXT: add x0, x0, #1 |
| ; CHECK-NEXT: tbnz w8, #1, .LBB45_3 |
| ; CHECK-NEXT: b .LBB45_4 |
| ; CHECK-NEXT: .LBB45_2: |
| ; CHECK-NEXT: adrp x9, .LCPI45_0 |
| ; CHECK-NEXT: add x9, x9, :lo12:.LCPI45_0 |
| ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9] |
| ; CHECK-NEXT: tbz w8, #1, .LBB45_4 |
| ; CHECK-NEXT: .LBB45_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: .LBB45_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB45_36 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB45_37 |
| ; CHECK-NEXT: .LBB45_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB45_38 |
| ; CHECK-NEXT: .LBB45_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB45_39 |
| ; CHECK-NEXT: .LBB45_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB45_40 |
| ; CHECK-NEXT: .LBB45_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB45_41 |
| ; CHECK-NEXT: .LBB45_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB45_42 |
| ; CHECK-NEXT: .LBB45_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB45_43 |
| ; CHECK-NEXT: .LBB45_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB45_44 |
| ; CHECK-NEXT: .LBB45_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB45_45 |
| ; CHECK-NEXT: .LBB45_14: // %else42 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB45_46 |
| ; CHECK-NEXT: .LBB45_15: // %else46 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB45_47 |
| ; CHECK-NEXT: .LBB45_16: // %else50 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB45_48 |
| ; CHECK-NEXT: .LBB45_17: // %else54 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB45_49 |
| ; CHECK-NEXT: .LBB45_18: // %else58 |
| ; CHECK-NEXT: tbnz w8, #16, .LBB45_50 |
| ; CHECK-NEXT: .LBB45_19: // %else62 |
| ; CHECK-NEXT: tbnz w8, #17, .LBB45_51 |
| ; CHECK-NEXT: .LBB45_20: // %else66 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB45_52 |
| ; CHECK-NEXT: .LBB45_21: // %else70 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB45_53 |
| ; CHECK-NEXT: .LBB45_22: // %else74 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB45_54 |
| ; CHECK-NEXT: .LBB45_23: // %else78 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB45_55 |
| ; CHECK-NEXT: .LBB45_24: // %else82 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB45_56 |
| ; CHECK-NEXT: .LBB45_25: // %else86 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB45_57 |
| ; CHECK-NEXT: .LBB45_26: // %else90 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB45_58 |
| ; CHECK-NEXT: .LBB45_27: // %else94 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB45_59 |
| ; CHECK-NEXT: .LBB45_28: // %else98 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB45_60 |
| ; CHECK-NEXT: .LBB45_29: // %else102 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB45_61 |
| ; CHECK-NEXT: .LBB45_30: // %else106 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB45_62 |
| ; CHECK-NEXT: .LBB45_31: // %else110 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB45_63 |
| ; CHECK-NEXT: .LBB45_32: // %else114 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB45_64 |
| ; CHECK-NEXT: .LBB45_33: // %else118 |
| ; CHECK-NEXT: tbz w8, #31, .LBB45_35 |
| ; CHECK-NEXT: .LBB45_34: // %cond.load121 |
| ; CHECK-NEXT: mov w8, #31 // =0x1f |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w8 |
| ; CHECK-NEXT: ldrb w8, [x0] |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w8 |
| ; CHECK-NEXT: .LBB45_35: // %else122 |
| ; CHECK-NEXT: uunpklo z0.h, z0.b |
| ; CHECK-NEXT: ptrue p0.d, vl32 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: uunpklo z0.d, z0.s |
| ; CHECK-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-NEXT: add sp, sp, #112 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB45_36: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #3, .LBB45_6 |
| ; CHECK-NEXT: .LBB45_37: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #4, .LBB45_7 |
| ; CHECK-NEXT: .LBB45_38: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #5, .LBB45_8 |
| ; CHECK-NEXT: .LBB45_39: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #6, .LBB45_9 |
| ; CHECK-NEXT: .LBB45_40: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #7, .LBB45_10 |
| ; CHECK-NEXT: .LBB45_41: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #8, .LBB45_11 |
| ; CHECK-NEXT: .LBB45_42: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #9, .LBB45_12 |
| ; CHECK-NEXT: .LBB45_43: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #10, .LBB45_13 |
| ; CHECK-NEXT: .LBB45_44: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #11, .LBB45_14 |
| ; CHECK-NEXT: .LBB45_45: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #12, .LBB45_15 |
| ; CHECK-NEXT: .LBB45_46: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #13, .LBB45_16 |
| ; CHECK-NEXT: .LBB45_47: // %cond.load49 |
| ; CHECK-NEXT: mov w9, #13 // =0xd |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #14, .LBB45_17 |
| ; CHECK-NEXT: .LBB45_48: // %cond.load53 |
| ; CHECK-NEXT: mov w9, #14 // =0xe |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #15, .LBB45_18 |
| ; CHECK-NEXT: .LBB45_49: // %cond.load57 |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #16, .LBB45_19 |
| ; CHECK-NEXT: .LBB45_50: // %cond.load61 |
| ; CHECK-NEXT: mov w9, #16 // =0x10 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #17, .LBB45_20 |
| ; CHECK-NEXT: .LBB45_51: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #18, .LBB45_21 |
| ; CHECK-NEXT: .LBB45_52: // %cond.load69 |
| ; CHECK-NEXT: mov w9, #18 // =0x12 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #19, .LBB45_22 |
| ; CHECK-NEXT: .LBB45_53: // %cond.load73 |
| ; CHECK-NEXT: mov w9, #19 // =0x13 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #20, .LBB45_23 |
| ; CHECK-NEXT: .LBB45_54: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #21, .LBB45_24 |
| ; CHECK-NEXT: .LBB45_55: // %cond.load81 |
| ; CHECK-NEXT: mov w9, #21 // =0x15 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #22, .LBB45_25 |
| ; CHECK-NEXT: .LBB45_56: // %cond.load85 |
| ; CHECK-NEXT: mov w9, #22 // =0x16 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #23, .LBB45_26 |
| ; CHECK-NEXT: .LBB45_57: // %cond.load89 |
| ; CHECK-NEXT: mov w9, #23 // =0x17 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #24, .LBB45_27 |
| ; CHECK-NEXT: .LBB45_58: // %cond.load93 |
| ; CHECK-NEXT: mov w9, #24 // =0x18 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #25, .LBB45_28 |
| ; CHECK-NEXT: .LBB45_59: // %cond.load97 |
| ; CHECK-NEXT: mov w9, #25 // =0x19 |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #26, .LBB45_29 |
| ; CHECK-NEXT: .LBB45_60: // %cond.load101 |
| ; CHECK-NEXT: mov w9, #26 // =0x1a |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #27, .LBB45_30 |
| ; CHECK-NEXT: .LBB45_61: // %cond.load105 |
| ; CHECK-NEXT: mov w9, #27 // =0x1b |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #28, .LBB45_31 |
| ; CHECK-NEXT: .LBB45_62: // %cond.load109 |
| ; CHECK-NEXT: mov w9, #28 // =0x1c |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #29, .LBB45_32 |
| ; CHECK-NEXT: .LBB45_63: // %cond.load113 |
| ; CHECK-NEXT: mov w9, #29 // =0x1d |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #30, .LBB45_33 |
| ; CHECK-NEXT: .LBB45_64: // %cond.load117 |
| ; CHECK-NEXT: mov w9, #30 // =0x1e |
| ; CHECK-NEXT: index z1.b, #0, #1 |
| ; CHECK-NEXT: mov z2.b, w9 |
| ; CHECK-NEXT: ldrb w9, [x0], #1 |
| ; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b |
| ; CHECK-NEXT: mov z0.b, p1/m, w9 |
| ; CHECK-NEXT: tbnz w8, #31, .LBB45_34 |
| ; CHECK-NEXT: b .LBB45_35 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v32i8i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl32 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.d |
| ; CHECK-EXPAND-NEXT: whilelo p2.d, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1b { z0.d }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.d, p1, z0.d |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <32 x i8>, ptr %bp |
| %mask = icmp eq <32 x i8> %b, zeroinitializer |
| %load = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %ap, <32 x i1> %mask, <32 x i8> poison) |
| %ext = zext <32 x i8> %load to <32 x i64> |
| store <32 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v64i16i32(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_zext_v64i16i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #112 |
| ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: ptrue p1.h, vl64 |
| ; CHECK-NEXT: str x2, [sp] // 8-byte Spill |
| ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x1] |
| ; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, #0 |
| ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: ptrue p0.h |
| ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-NEXT: umov w12, v0.b[1] |
| ; CHECK-NEXT: fmov w25, s0 |
| ; CHECK-NEXT: mov z3.b, z0.b[18] |
| ; CHECK-NEXT: mov z4.b, z0.b[19] |
| ; CHECK-NEXT: umov w13, v0.b[2] |
| ; CHECK-NEXT: umov w14, v0.b[7] |
| ; CHECK-NEXT: umov w3, v0.b[8] |
| ; CHECK-NEXT: mov z5.b, z0.b[20] |
| ; CHECK-NEXT: umov w4, v0.b[9] |
| ; CHECK-NEXT: mov z6.b, z0.b[21] |
| ; CHECK-NEXT: and x25, x25, #0x1 |
| ; CHECK-NEXT: umov w5, v0.b[10] |
| ; CHECK-NEXT: mov z7.b, z0.b[22] |
| ; CHECK-NEXT: fmov w19, s3 |
| ; CHECK-NEXT: fmov w20, s4 |
| ; CHECK-NEXT: bfi x25, x12, #1, #1 |
| ; CHECK-NEXT: umov w11, v0.b[3] |
| ; CHECK-NEXT: mov z16.b, z0.b[23] |
| ; CHECK-NEXT: fmov w21, s5 |
| ; CHECK-NEXT: umov w15, v0.b[11] |
| ; CHECK-NEXT: fmov w22, s6 |
| ; CHECK-NEXT: bfi x25, x13, #2, #1 |
| ; CHECK-NEXT: ubfiz x13, x14, #7, #1 |
| ; CHECK-NEXT: ubfiz x14, x3, #8, #1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: umov w17, v0.b[12] |
| ; CHECK-NEXT: mov z17.b, z0.b[24] |
| ; CHECK-NEXT: fmov w23, s7 |
| ; CHECK-NEXT: ubfiz x3, x4, #9, #1 |
| ; CHECK-NEXT: ubfiz x4, x19, #18, #1 |
| ; CHECK-NEXT: ubfiz x19, x20, #19, #1 |
| ; CHECK-NEXT: umov w18, v0.b[13] |
| ; CHECK-NEXT: mov z18.b, z0.b[25] |
| ; CHECK-NEXT: fmov w24, s16 |
| ; CHECK-NEXT: orr x13, x13, x14 |
| ; CHECK-NEXT: ubfiz x14, x5, #10, #1 |
| ; CHECK-NEXT: ubfiz x5, x21, #20, #1 |
| ; CHECK-NEXT: umov w9, v0.b[5] |
| ; CHECK-NEXT: umov w16, v0.b[14] |
| ; CHECK-NEXT: mov z19.b, z0.b[26] |
| ; CHECK-NEXT: orr x4, x4, x19 |
| ; CHECK-NEXT: orr x13, x13, x3 |
| ; CHECK-NEXT: ubfiz x3, x22, #21, #1 |
| ; CHECK-NEXT: bfi x25, x11, #3, #1 |
| ; CHECK-NEXT: mov z20.b, z0.b[27] |
| ; CHECK-NEXT: fmov w26, s17 |
| ; CHECK-NEXT: orr x11, x13, x14 |
| ; CHECK-NEXT: orr x13, x4, x5 |
| ; CHECK-NEXT: ubfiz x14, x15, #11, #1 |
| ; CHECK-NEXT: ubfiz x15, x23, #22, #1 |
| ; CHECK-NEXT: mov z1.b, z0.b[16] |
| ; CHECK-NEXT: mov z21.b, z0.b[28] |
| ; CHECK-NEXT: fmov w27, s18 |
| ; CHECK-NEXT: orr x13, x13, x3 |
| ; CHECK-NEXT: bfi x25, x10, #4, #1 |
| ; CHECK-NEXT: ubfiz x10, x17, #12, #1 |
| ; CHECK-NEXT: ubfiz x17, x24, #23, #1 |
| ; CHECK-NEXT: umov w1, v0.b[15] |
| ; CHECK-NEXT: mov z2.b, z0.b[17] |
| ; CHECK-NEXT: mov z4.b, z0.b[29] |
| ; CHECK-NEXT: fmov w28, s19 |
| ; CHECK-NEXT: orr x11, x11, x14 |
| ; CHECK-NEXT: orr x13, x13, x15 |
| ; CHECK-NEXT: ubfiz x14, x18, #13, #1 |
| ; CHECK-NEXT: mov z5.b, z0.b[30] |
| ; CHECK-NEXT: fmov w29, s20 |
| ; CHECK-NEXT: orr x10, x11, x10 |
| ; CHECK-NEXT: bfi x25, x9, #5, #1 |
| ; CHECK-NEXT: orr x9, x13, x17 |
| ; CHECK-NEXT: ubfiz x11, x16, #14, #1 |
| ; CHECK-NEXT: ubfiz x13, x26, #24, #1 |
| ; CHECK-NEXT: fmov w6, s1 |
| ; CHECK-NEXT: fmov w12, s21 |
| ; CHECK-NEXT: orr x10, x10, x14 |
| ; CHECK-NEXT: ubfiz x15, x27, #25, #1 |
| ; CHECK-NEXT: umov w2, v0.b[6] |
| ; CHECK-NEXT: fmov w7, s2 |
| ; CHECK-NEXT: fmov w30, s4 |
| ; CHECK-NEXT: orr x10, x10, x11 |
| ; CHECK-NEXT: orr x9, x9, x13 |
| ; CHECK-NEXT: ubfiz x11, x28, #26, #1 |
| ; CHECK-NEXT: fmov w8, s5 |
| ; CHECK-NEXT: ubfiz x14, x1, #15, #1 |
| ; CHECK-NEXT: orr x9, x9, x15 |
| ; CHECK-NEXT: ubfiz x13, x29, #27, #1 |
| ; CHECK-NEXT: mov z3.b, z0.b[31] |
| ; CHECK-NEXT: orr x9, x9, x11 |
| ; CHECK-NEXT: ubfiz x11, x6, #16, #1 |
| ; CHECK-NEXT: ubfiz x12, x12, #28, #1 |
| ; CHECK-NEXT: orr x10, x10, x14 |
| ; CHECK-NEXT: orr x9, x9, x13 |
| ; CHECK-NEXT: ubfiz x13, x7, #17, #1 |
| ; CHECK-NEXT: ubfiz x14, x30, #29, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[32] |
| ; CHECK-NEXT: bfi x25, x2, #6, #1 |
| ; CHECK-NEXT: orr x10, x10, x11 |
| ; CHECK-NEXT: orr x9, x9, x12 |
| ; CHECK-NEXT: ubfiz x8, x8, #30, #1 |
| ; CHECK-NEXT: fmov w11, s3 |
| ; CHECK-NEXT: orr x10, x10, x13 |
| ; CHECK-NEXT: orr x9, x9, x14 |
| ; CHECK-NEXT: mov z1.b, z0.b[33] |
| ; CHECK-NEXT: orr x10, x25, x10 |
| ; CHECK-NEXT: orr x8, x9, x8 |
| ; CHECK-NEXT: orr x8, x10, x8 |
| ; CHECK-NEXT: fmov w10, s2 |
| ; CHECK-NEXT: lsl w9, w11, #31 |
| ; CHECK-NEXT: mov z2.b, z0.b[34] |
| ; CHECK-NEXT: orr x8, x8, x9 |
| ; CHECK-NEXT: and w9, w10, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #32 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[35] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #33 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[36] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #34 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[37] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #35 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[38] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #36 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[39] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #37 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[40] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #38 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[41] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #39 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[42] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #40 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[43] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #41 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[44] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #42 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[45] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #43 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[46] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #44 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[47] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #45 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[48] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #46 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[49] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #47 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[50] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #48 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[51] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #49 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[52] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #50 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[53] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #51 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[54] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #52 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[55] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #53 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[56] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #54 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[57] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #55 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[58] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #56 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[59] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #57 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[60] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #58 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: mov z1.b, z0.b[61] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: fmov w10, s1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #59 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: mov z2.b, z0.b[62] |
| ; CHECK-NEXT: mov z0.b, z0.b[63] |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #60 |
| ; CHECK-NEXT: and w9, w10, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #61 |
| ; CHECK-NEXT: fmov w9, s2 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #62 |
| ; CHECK-NEXT: fmov w9, s0 |
| ; CHECK-NEXT: orr x8, x8, x9, lsl #63 |
| ; CHECK-NEXT: tbz w8, #0, .LBB46_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0] |
| ; CHECK-NEXT: add x0, x0, #2 |
| ; CHECK-NEXT: tbnz w8, #1, .LBB46_3 |
| ; CHECK-NEXT: b .LBB46_4 |
| ; CHECK-NEXT: .LBB46_2: |
| ; CHECK-NEXT: adrp x9, .LCPI46_0 |
| ; CHECK-NEXT: add x9, x9, :lo12:.LCPI46_0 |
| ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x9] |
| ; CHECK-NEXT: tbz w8, #1, .LBB46_4 |
| ; CHECK-NEXT: .LBB46_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: .LBB46_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB46_68 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB46_69 |
| ; CHECK-NEXT: .LBB46_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB46_70 |
| ; CHECK-NEXT: .LBB46_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB46_71 |
| ; CHECK-NEXT: .LBB46_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB46_72 |
| ; CHECK-NEXT: .LBB46_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB46_73 |
| ; CHECK-NEXT: .LBB46_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB46_74 |
| ; CHECK-NEXT: .LBB46_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB46_75 |
| ; CHECK-NEXT: .LBB46_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB46_76 |
| ; CHECK-NEXT: .LBB46_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB46_77 |
| ; CHECK-NEXT: .LBB46_14: // %else42 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB46_78 |
| ; CHECK-NEXT: .LBB46_15: // %else46 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB46_79 |
| ; CHECK-NEXT: .LBB46_16: // %else50 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB46_80 |
| ; CHECK-NEXT: .LBB46_17: // %else54 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB46_81 |
| ; CHECK-NEXT: .LBB46_18: // %else58 |
| ; CHECK-NEXT: tbnz w8, #16, .LBB46_82 |
| ; CHECK-NEXT: .LBB46_19: // %else62 |
| ; CHECK-NEXT: tbnz w8, #17, .LBB46_83 |
| ; CHECK-NEXT: .LBB46_20: // %else66 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB46_84 |
| ; CHECK-NEXT: .LBB46_21: // %else70 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB46_85 |
| ; CHECK-NEXT: .LBB46_22: // %else74 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB46_86 |
| ; CHECK-NEXT: .LBB46_23: // %else78 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB46_87 |
| ; CHECK-NEXT: .LBB46_24: // %else82 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB46_88 |
| ; CHECK-NEXT: .LBB46_25: // %else86 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB46_89 |
| ; CHECK-NEXT: .LBB46_26: // %else90 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB46_90 |
| ; CHECK-NEXT: .LBB46_27: // %else94 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB46_91 |
| ; CHECK-NEXT: .LBB46_28: // %else98 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB46_92 |
| ; CHECK-NEXT: .LBB46_29: // %else102 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB46_93 |
| ; CHECK-NEXT: .LBB46_30: // %else106 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB46_94 |
| ; CHECK-NEXT: .LBB46_31: // %else110 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB46_95 |
| ; CHECK-NEXT: .LBB46_32: // %else114 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB46_96 |
| ; CHECK-NEXT: .LBB46_33: // %else118 |
| ; CHECK-NEXT: tbnz w8, #31, .LBB46_97 |
| ; CHECK-NEXT: .LBB46_34: // %else122 |
| ; CHECK-NEXT: tbnz x8, #32, .LBB46_98 |
| ; CHECK-NEXT: .LBB46_35: // %else126 |
| ; CHECK-NEXT: tbnz x8, #33, .LBB46_99 |
| ; CHECK-NEXT: .LBB46_36: // %else130 |
| ; CHECK-NEXT: tbnz x8, #34, .LBB46_100 |
| ; CHECK-NEXT: .LBB46_37: // %else134 |
| ; CHECK-NEXT: tbnz x8, #35, .LBB46_101 |
| ; CHECK-NEXT: .LBB46_38: // %else138 |
| ; CHECK-NEXT: tbnz x8, #36, .LBB46_102 |
| ; CHECK-NEXT: .LBB46_39: // %else142 |
| ; CHECK-NEXT: tbnz x8, #37, .LBB46_103 |
| ; CHECK-NEXT: .LBB46_40: // %else146 |
| ; CHECK-NEXT: tbnz x8, #38, .LBB46_104 |
| ; CHECK-NEXT: .LBB46_41: // %else150 |
| ; CHECK-NEXT: tbnz x8, #39, .LBB46_105 |
| ; CHECK-NEXT: .LBB46_42: // %else154 |
| ; CHECK-NEXT: tbnz x8, #40, .LBB46_106 |
| ; CHECK-NEXT: .LBB46_43: // %else158 |
| ; CHECK-NEXT: tbnz x8, #41, .LBB46_107 |
| ; CHECK-NEXT: .LBB46_44: // %else162 |
| ; CHECK-NEXT: tbnz x8, #42, .LBB46_108 |
| ; CHECK-NEXT: .LBB46_45: // %else166 |
| ; CHECK-NEXT: tbnz x8, #43, .LBB46_109 |
| ; CHECK-NEXT: .LBB46_46: // %else170 |
| ; CHECK-NEXT: tbnz x8, #44, .LBB46_110 |
| ; CHECK-NEXT: .LBB46_47: // %else174 |
| ; CHECK-NEXT: tbnz x8, #45, .LBB46_111 |
| ; CHECK-NEXT: .LBB46_48: // %else178 |
| ; CHECK-NEXT: tbnz x8, #46, .LBB46_112 |
| ; CHECK-NEXT: .LBB46_49: // %else182 |
| ; CHECK-NEXT: tbnz x8, #47, .LBB46_113 |
| ; CHECK-NEXT: .LBB46_50: // %else186 |
| ; CHECK-NEXT: tbnz x8, #48, .LBB46_114 |
| ; CHECK-NEXT: .LBB46_51: // %else190 |
| ; CHECK-NEXT: tbnz x8, #49, .LBB46_115 |
| ; CHECK-NEXT: .LBB46_52: // %else194 |
| ; CHECK-NEXT: tbnz x8, #50, .LBB46_116 |
| ; CHECK-NEXT: .LBB46_53: // %else198 |
| ; CHECK-NEXT: tbnz x8, #51, .LBB46_117 |
| ; CHECK-NEXT: .LBB46_54: // %else202 |
| ; CHECK-NEXT: tbnz x8, #52, .LBB46_118 |
| ; CHECK-NEXT: .LBB46_55: // %else206 |
| ; CHECK-NEXT: tbnz x8, #53, .LBB46_119 |
| ; CHECK-NEXT: .LBB46_56: // %else210 |
| ; CHECK-NEXT: tbnz x8, #54, .LBB46_120 |
| ; CHECK-NEXT: .LBB46_57: // %else214 |
| ; CHECK-NEXT: tbnz x8, #55, .LBB46_121 |
| ; CHECK-NEXT: .LBB46_58: // %else218 |
| ; CHECK-NEXT: tbnz x8, #56, .LBB46_122 |
| ; CHECK-NEXT: .LBB46_59: // %else222 |
| ; CHECK-NEXT: tbnz x8, #57, .LBB46_123 |
| ; CHECK-NEXT: .LBB46_60: // %else226 |
| ; CHECK-NEXT: tbnz x8, #58, .LBB46_124 |
| ; CHECK-NEXT: .LBB46_61: // %else230 |
| ; CHECK-NEXT: tbnz x8, #59, .LBB46_125 |
| ; CHECK-NEXT: .LBB46_62: // %else234 |
| ; CHECK-NEXT: tbnz x8, #60, .LBB46_126 |
| ; CHECK-NEXT: .LBB46_63: // %else238 |
| ; CHECK-NEXT: tbnz x8, #61, .LBB46_127 |
| ; CHECK-NEXT: .LBB46_64: // %else242 |
| ; CHECK-NEXT: tbnz x8, #62, .LBB46_128 |
| ; CHECK-NEXT: .LBB46_65: // %else246 |
| ; CHECK-NEXT: tbz x8, #63, .LBB46_67 |
| ; CHECK-NEXT: .LBB46_66: // %cond.load249 |
| ; CHECK-NEXT: mov w8, #63 // =0x3f |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w8 |
| ; CHECK-NEXT: ldrh w8, [x0] |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w8 |
| ; CHECK-NEXT: .LBB46_67: // %else250 |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.s, vl64 |
| ; CHECK-NEXT: ldr x8, [sp] // 8-byte Reload |
| ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: st1w { z0.s }, p0, [x8] |
| ; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: add sp, sp, #112 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB46_68: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #3, .LBB46_6 |
| ; CHECK-NEXT: .LBB46_69: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #4, .LBB46_7 |
| ; CHECK-NEXT: .LBB46_70: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #5, .LBB46_8 |
| ; CHECK-NEXT: .LBB46_71: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #6, .LBB46_9 |
| ; CHECK-NEXT: .LBB46_72: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #7, .LBB46_10 |
| ; CHECK-NEXT: .LBB46_73: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #8, .LBB46_11 |
| ; CHECK-NEXT: .LBB46_74: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #9, .LBB46_12 |
| ; CHECK-NEXT: .LBB46_75: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #10, .LBB46_13 |
| ; CHECK-NEXT: .LBB46_76: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #11, .LBB46_14 |
| ; CHECK-NEXT: .LBB46_77: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #12, .LBB46_15 |
| ; CHECK-NEXT: .LBB46_78: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #13, .LBB46_16 |
| ; CHECK-NEXT: .LBB46_79: // %cond.load49 |
| ; CHECK-NEXT: mov w9, #13 // =0xd |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #14, .LBB46_17 |
| ; CHECK-NEXT: .LBB46_80: // %cond.load53 |
| ; CHECK-NEXT: mov w9, #14 // =0xe |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #15, .LBB46_18 |
| ; CHECK-NEXT: .LBB46_81: // %cond.load57 |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #16, .LBB46_19 |
| ; CHECK-NEXT: .LBB46_82: // %cond.load61 |
| ; CHECK-NEXT: mov w9, #16 // =0x10 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #17, .LBB46_20 |
| ; CHECK-NEXT: .LBB46_83: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #18, .LBB46_21 |
| ; CHECK-NEXT: .LBB46_84: // %cond.load69 |
| ; CHECK-NEXT: mov w9, #18 // =0x12 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #19, .LBB46_22 |
| ; CHECK-NEXT: .LBB46_85: // %cond.load73 |
| ; CHECK-NEXT: mov w9, #19 // =0x13 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #20, .LBB46_23 |
| ; CHECK-NEXT: .LBB46_86: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #21, .LBB46_24 |
| ; CHECK-NEXT: .LBB46_87: // %cond.load81 |
| ; CHECK-NEXT: mov w9, #21 // =0x15 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #22, .LBB46_25 |
| ; CHECK-NEXT: .LBB46_88: // %cond.load85 |
| ; CHECK-NEXT: mov w9, #22 // =0x16 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #23, .LBB46_26 |
| ; CHECK-NEXT: .LBB46_89: // %cond.load89 |
| ; CHECK-NEXT: mov w9, #23 // =0x17 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #24, .LBB46_27 |
| ; CHECK-NEXT: .LBB46_90: // %cond.load93 |
| ; CHECK-NEXT: mov w9, #24 // =0x18 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #25, .LBB46_28 |
| ; CHECK-NEXT: .LBB46_91: // %cond.load97 |
| ; CHECK-NEXT: mov w9, #25 // =0x19 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #26, .LBB46_29 |
| ; CHECK-NEXT: .LBB46_92: // %cond.load101 |
| ; CHECK-NEXT: mov w9, #26 // =0x1a |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #27, .LBB46_30 |
| ; CHECK-NEXT: .LBB46_93: // %cond.load105 |
| ; CHECK-NEXT: mov w9, #27 // =0x1b |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #28, .LBB46_31 |
| ; CHECK-NEXT: .LBB46_94: // %cond.load109 |
| ; CHECK-NEXT: mov w9, #28 // =0x1c |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #29, .LBB46_32 |
| ; CHECK-NEXT: .LBB46_95: // %cond.load113 |
| ; CHECK-NEXT: mov w9, #29 // =0x1d |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #30, .LBB46_33 |
| ; CHECK-NEXT: .LBB46_96: // %cond.load117 |
| ; CHECK-NEXT: mov w9, #30 // =0x1e |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #31, .LBB46_34 |
| ; CHECK-NEXT: .LBB46_97: // %cond.load121 |
| ; CHECK-NEXT: mov w9, #31 // =0x1f |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #32, .LBB46_35 |
| ; CHECK-NEXT: .LBB46_98: // %cond.load125 |
| ; CHECK-NEXT: mov w9, #32 // =0x20 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #33, .LBB46_36 |
| ; CHECK-NEXT: .LBB46_99: // %cond.load129 |
| ; CHECK-NEXT: mov w9, #33 // =0x21 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #34, .LBB46_37 |
| ; CHECK-NEXT: .LBB46_100: // %cond.load133 |
| ; CHECK-NEXT: mov w9, #34 // =0x22 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #35, .LBB46_38 |
| ; CHECK-NEXT: .LBB46_101: // %cond.load137 |
| ; CHECK-NEXT: mov w9, #35 // =0x23 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #36, .LBB46_39 |
| ; CHECK-NEXT: .LBB46_102: // %cond.load141 |
| ; CHECK-NEXT: mov w9, #36 // =0x24 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #37, .LBB46_40 |
| ; CHECK-NEXT: .LBB46_103: // %cond.load145 |
| ; CHECK-NEXT: mov w9, #37 // =0x25 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #38, .LBB46_41 |
| ; CHECK-NEXT: .LBB46_104: // %cond.load149 |
| ; CHECK-NEXT: mov w9, #38 // =0x26 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #39, .LBB46_42 |
| ; CHECK-NEXT: .LBB46_105: // %cond.load153 |
| ; CHECK-NEXT: mov w9, #39 // =0x27 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #40, .LBB46_43 |
| ; CHECK-NEXT: .LBB46_106: // %cond.load157 |
| ; CHECK-NEXT: mov w9, #40 // =0x28 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #41, .LBB46_44 |
| ; CHECK-NEXT: .LBB46_107: // %cond.load161 |
| ; CHECK-NEXT: mov w9, #41 // =0x29 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #42, .LBB46_45 |
| ; CHECK-NEXT: .LBB46_108: // %cond.load165 |
| ; CHECK-NEXT: mov w9, #42 // =0x2a |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #43, .LBB46_46 |
| ; CHECK-NEXT: .LBB46_109: // %cond.load169 |
| ; CHECK-NEXT: mov w9, #43 // =0x2b |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #44, .LBB46_47 |
| ; CHECK-NEXT: .LBB46_110: // %cond.load173 |
| ; CHECK-NEXT: mov w9, #44 // =0x2c |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #45, .LBB46_48 |
| ; CHECK-NEXT: .LBB46_111: // %cond.load177 |
| ; CHECK-NEXT: mov w9, #45 // =0x2d |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #46, .LBB46_49 |
| ; CHECK-NEXT: .LBB46_112: // %cond.load181 |
| ; CHECK-NEXT: mov w9, #46 // =0x2e |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #47, .LBB46_50 |
| ; CHECK-NEXT: .LBB46_113: // %cond.load185 |
| ; CHECK-NEXT: mov w9, #47 // =0x2f |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #48, .LBB46_51 |
| ; CHECK-NEXT: .LBB46_114: // %cond.load189 |
| ; CHECK-NEXT: mov w9, #48 // =0x30 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #49, .LBB46_52 |
| ; CHECK-NEXT: .LBB46_115: // %cond.load193 |
| ; CHECK-NEXT: mov w9, #49 // =0x31 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #50, .LBB46_53 |
| ; CHECK-NEXT: .LBB46_116: // %cond.load197 |
| ; CHECK-NEXT: mov w9, #50 // =0x32 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #51, .LBB46_54 |
| ; CHECK-NEXT: .LBB46_117: // %cond.load201 |
| ; CHECK-NEXT: mov w9, #51 // =0x33 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #52, .LBB46_55 |
| ; CHECK-NEXT: .LBB46_118: // %cond.load205 |
| ; CHECK-NEXT: mov w9, #52 // =0x34 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #53, .LBB46_56 |
| ; CHECK-NEXT: .LBB46_119: // %cond.load209 |
| ; CHECK-NEXT: mov w9, #53 // =0x35 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #54, .LBB46_57 |
| ; CHECK-NEXT: .LBB46_120: // %cond.load213 |
| ; CHECK-NEXT: mov w9, #54 // =0x36 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #55, .LBB46_58 |
| ; CHECK-NEXT: .LBB46_121: // %cond.load217 |
| ; CHECK-NEXT: mov w9, #55 // =0x37 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #56, .LBB46_59 |
| ; CHECK-NEXT: .LBB46_122: // %cond.load221 |
| ; CHECK-NEXT: mov w9, #56 // =0x38 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #57, .LBB46_60 |
| ; CHECK-NEXT: .LBB46_123: // %cond.load225 |
| ; CHECK-NEXT: mov w9, #57 // =0x39 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #58, .LBB46_61 |
| ; CHECK-NEXT: .LBB46_124: // %cond.load229 |
| ; CHECK-NEXT: mov w9, #58 // =0x3a |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #59, .LBB46_62 |
| ; CHECK-NEXT: .LBB46_125: // %cond.load233 |
| ; CHECK-NEXT: mov w9, #59 // =0x3b |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #60, .LBB46_63 |
| ; CHECK-NEXT: .LBB46_126: // %cond.load237 |
| ; CHECK-NEXT: mov w9, #60 // =0x3c |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #61, .LBB46_64 |
| ; CHECK-NEXT: .LBB46_127: // %cond.load241 |
| ; CHECK-NEXT: mov w9, #61 // =0x3d |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz x8, #62, .LBB46_65 |
| ; CHECK-NEXT: .LBB46_128: // %cond.load245 |
| ; CHECK-NEXT: mov w9, #62 // =0x3e |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbnz x8, #63, .LBB46_66 |
| ; CHECK-NEXT: b .LBB46_67 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v64i16i32: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl64 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s |
| ; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.s }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s |
| ; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <64 x i16>, ptr %bp |
| %mask = icmp eq <64 x i16> %b, zeroinitializer |
| %load = call <64 x i16> @llvm.masked.expandload.v64i16(ptr %ap, <64 x i1> %mask, <64 x i16> poison) |
| %ext = zext <64 x i16> %load to <64 x i32> |
| store <64 x i32> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v32i16i64(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_zext_v32i16i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #112 |
| ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: ptrue p1.h, vl32 |
| ; CHECK-NEXT: str x2, [sp] // 8-byte Spill |
| ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x1] |
| ; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, #0 |
| ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: ptrue p0.h |
| ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-NEXT: umov w12, v0.b[1] |
| ; CHECK-NEXT: fmov w6, s0 |
| ; CHECK-NEXT: umov w3, v0.b[7] |
| ; CHECK-NEXT: umov w5, v0.b[8] |
| ; CHECK-NEXT: mov z5.b, z0.b[18] |
| ; CHECK-NEXT: mov z6.b, z0.b[19] |
| ; CHECK-NEXT: umov w13, v0.b[2] |
| ; CHECK-NEXT: umov w4, v0.b[9] |
| ; CHECK-NEXT: mov z7.b, z0.b[20] |
| ; CHECK-NEXT: umov w1, v0.b[10] |
| ; CHECK-NEXT: and w6, w6, #0x1 |
| ; CHECK-NEXT: mov z16.b, z0.b[21] |
| ; CHECK-NEXT: fmov w20, s5 |
| ; CHECK-NEXT: fmov w21, s6 |
| ; CHECK-NEXT: bfi w6, w12, #1, #1 |
| ; CHECK-NEXT: umov w11, v0.b[3] |
| ; CHECK-NEXT: umov w16, v0.b[11] |
| ; CHECK-NEXT: mov z17.b, z0.b[22] |
| ; CHECK-NEXT: fmov w22, s7 |
| ; CHECK-NEXT: ubfiz w12, w3, #7, #1 |
| ; CHECK-NEXT: ubfiz w3, w5, #8, #1 |
| ; CHECK-NEXT: umov w17, v0.b[12] |
| ; CHECK-NEXT: mov z18.b, z0.b[23] |
| ; CHECK-NEXT: bfi w6, w13, #2, #1 |
| ; CHECK-NEXT: ubfiz w13, w4, #9, #1 |
| ; CHECK-NEXT: umov w18, v0.b[13] |
| ; CHECK-NEXT: mov z19.b, z0.b[24] |
| ; CHECK-NEXT: fmov w23, s16 |
| ; CHECK-NEXT: ubfiz w5, w20, #18, #1 |
| ; CHECK-NEXT: ubfiz w20, w21, #19, #1 |
| ; CHECK-NEXT: orr w12, w12, w3 |
| ; CHECK-NEXT: ubfiz w1, w1, #10, #1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: mov z20.b, z0.b[25] |
| ; CHECK-NEXT: fmov w24, s17 |
| ; CHECK-NEXT: ubfiz w4, w22, #20, #1 |
| ; CHECK-NEXT: orr w12, w12, w13 |
| ; CHECK-NEXT: mov z21.b, z0.b[26] |
| ; CHECK-NEXT: fmov w25, s18 |
| ; CHECK-NEXT: orr w3, w5, w20 |
| ; CHECK-NEXT: bfi w6, w11, #3, #1 |
| ; CHECK-NEXT: orr w11, w12, w1 |
| ; CHECK-NEXT: ubfiz w12, w16, #11, #1 |
| ; CHECK-NEXT: umov w9, v0.b[5] |
| ; CHECK-NEXT: umov w14, v0.b[14] |
| ; CHECK-NEXT: mov z22.b, z0.b[27] |
| ; CHECK-NEXT: fmov w26, s19 |
| ; CHECK-NEXT: orr w13, w3, w4 |
| ; CHECK-NEXT: ubfiz w3, w23, #21, #1 |
| ; CHECK-NEXT: ubfiz w16, w17, #12, #1 |
| ; CHECK-NEXT: fmov w27, s20 |
| ; CHECK-NEXT: ubfiz w17, w24, #22, #1 |
| ; CHECK-NEXT: orr w11, w11, w12 |
| ; CHECK-NEXT: ubfiz w12, w18, #13, #1 |
| ; CHECK-NEXT: fmov w28, s21 |
| ; CHECK-NEXT: orr w13, w13, w3 |
| ; CHECK-NEXT: ubfiz w18, w25, #23, #1 |
| ; CHECK-NEXT: bfi w6, w10, #4, #1 |
| ; CHECK-NEXT: orr w10, w11, w16 |
| ; CHECK-NEXT: umov w15, v0.b[15] |
| ; CHECK-NEXT: mov z3.b, z0.b[16] |
| ; CHECK-NEXT: mov z23.b, z0.b[28] |
| ; CHECK-NEXT: fmov w29, s22 |
| ; CHECK-NEXT: orr w11, w13, w17 |
| ; CHECK-NEXT: orr w10, w10, w12 |
| ; CHECK-NEXT: ubfiz w12, w26, #24, #1 |
| ; CHECK-NEXT: mov z4.b, z0.b[17] |
| ; CHECK-NEXT: mov z24.b, z0.b[29] |
| ; CHECK-NEXT: orr w11, w11, w18 |
| ; CHECK-NEXT: bfi w6, w9, #5, #1 |
| ; CHECK-NEXT: ubfiz w9, w14, #14, #1 |
| ; CHECK-NEXT: ubfiz w13, w27, #25, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[30] |
| ; CHECK-NEXT: orr w11, w11, w12 |
| ; CHECK-NEXT: ubfiz w14, w28, #26, #1 |
| ; CHECK-NEXT: fmov w7, s3 |
| ; CHECK-NEXT: fmov w30, s23 |
| ; CHECK-NEXT: orr w9, w10, w9 |
| ; CHECK-NEXT: orr w10, w11, w13 |
| ; CHECK-NEXT: ubfiz w11, w29, #27, #1 |
| ; CHECK-NEXT: umov w2, v0.b[6] |
| ; CHECK-NEXT: fmov w19, s4 |
| ; CHECK-NEXT: fmov w8, s24 |
| ; CHECK-NEXT: ubfiz w12, w15, #15, #1 |
| ; CHECK-NEXT: orr w10, w10, w14 |
| ; CHECK-NEXT: ubfiz w14, w30, #28, #1 |
| ; CHECK-NEXT: mov z1.b, z0.b[31] |
| ; CHECK-NEXT: orr w10, w10, w11 |
| ; CHECK-NEXT: fmov w11, s2 |
| ; CHECK-NEXT: orr w9, w9, w12 |
| ; CHECK-NEXT: ubfiz w12, w7, #16, #1 |
| ; CHECK-NEXT: ubfiz w13, w19, #17, #1 |
| ; CHECK-NEXT: ubfiz w8, w8, #29, #1 |
| ; CHECK-NEXT: bfi w6, w2, #6, #1 |
| ; CHECK-NEXT: orr w10, w10, w14 |
| ; CHECK-NEXT: orr w9, w9, w12 |
| ; CHECK-NEXT: ubfiz w11, w11, #30, #1 |
| ; CHECK-NEXT: orr w8, w10, w8 |
| ; CHECK-NEXT: orr w9, w9, w13 |
| ; CHECK-NEXT: orr w9, w6, w9 |
| ; CHECK-NEXT: orr w8, w8, w11 |
| ; CHECK-NEXT: orr w8, w9, w8 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: orr w8, w8, w9, lsl #31 |
| ; CHECK-NEXT: tbz w8, #0, .LBB47_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0] |
| ; CHECK-NEXT: add x0, x0, #2 |
| ; CHECK-NEXT: tbnz w8, #1, .LBB47_3 |
| ; CHECK-NEXT: b .LBB47_4 |
| ; CHECK-NEXT: .LBB47_2: |
| ; CHECK-NEXT: adrp x9, .LCPI47_0 |
| ; CHECK-NEXT: add x9, x9, :lo12:.LCPI47_0 |
| ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x9] |
| ; CHECK-NEXT: tbz w8, #1, .LBB47_4 |
| ; CHECK-NEXT: .LBB47_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: .LBB47_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB47_36 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB47_37 |
| ; CHECK-NEXT: .LBB47_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB47_38 |
| ; CHECK-NEXT: .LBB47_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB47_39 |
| ; CHECK-NEXT: .LBB47_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB47_40 |
| ; CHECK-NEXT: .LBB47_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB47_41 |
| ; CHECK-NEXT: .LBB47_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB47_42 |
| ; CHECK-NEXT: .LBB47_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB47_43 |
| ; CHECK-NEXT: .LBB47_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB47_44 |
| ; CHECK-NEXT: .LBB47_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB47_45 |
| ; CHECK-NEXT: .LBB47_14: // %else42 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB47_46 |
| ; CHECK-NEXT: .LBB47_15: // %else46 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB47_47 |
| ; CHECK-NEXT: .LBB47_16: // %else50 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB47_48 |
| ; CHECK-NEXT: .LBB47_17: // %else54 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB47_49 |
| ; CHECK-NEXT: .LBB47_18: // %else58 |
| ; CHECK-NEXT: tbnz w8, #16, .LBB47_50 |
| ; CHECK-NEXT: .LBB47_19: // %else62 |
| ; CHECK-NEXT: tbnz w8, #17, .LBB47_51 |
| ; CHECK-NEXT: .LBB47_20: // %else66 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB47_52 |
| ; CHECK-NEXT: .LBB47_21: // %else70 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB47_53 |
| ; CHECK-NEXT: .LBB47_22: // %else74 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB47_54 |
| ; CHECK-NEXT: .LBB47_23: // %else78 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB47_55 |
| ; CHECK-NEXT: .LBB47_24: // %else82 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB47_56 |
| ; CHECK-NEXT: .LBB47_25: // %else86 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB47_57 |
| ; CHECK-NEXT: .LBB47_26: // %else90 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB47_58 |
| ; CHECK-NEXT: .LBB47_27: // %else94 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB47_59 |
| ; CHECK-NEXT: .LBB47_28: // %else98 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB47_60 |
| ; CHECK-NEXT: .LBB47_29: // %else102 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB47_61 |
| ; CHECK-NEXT: .LBB47_30: // %else106 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB47_62 |
| ; CHECK-NEXT: .LBB47_31: // %else110 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB47_63 |
| ; CHECK-NEXT: .LBB47_32: // %else114 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB47_64 |
| ; CHECK-NEXT: .LBB47_33: // %else118 |
| ; CHECK-NEXT: tbz w8, #31, .LBB47_35 |
| ; CHECK-NEXT: .LBB47_34: // %cond.load121 |
| ; CHECK-NEXT: mov w8, #31 // =0x1f |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w8 |
| ; CHECK-NEXT: ldrh w8, [x0] |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w8 |
| ; CHECK-NEXT: .LBB47_35: // %else122 |
| ; CHECK-NEXT: uunpklo z0.s, z0.h |
| ; CHECK-NEXT: ptrue p0.d, vl32 |
| ; CHECK-NEXT: ldr x8, [sp] // 8-byte Reload |
| ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: uunpklo z0.d, z0.s |
| ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: st1d { z0.d }, p0, [x8] |
| ; CHECK-NEXT: add sp, sp, #112 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB47_36: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #3, .LBB47_6 |
| ; CHECK-NEXT: .LBB47_37: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #4, .LBB47_7 |
| ; CHECK-NEXT: .LBB47_38: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #5, .LBB47_8 |
| ; CHECK-NEXT: .LBB47_39: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #6, .LBB47_9 |
| ; CHECK-NEXT: .LBB47_40: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #7, .LBB47_10 |
| ; CHECK-NEXT: .LBB47_41: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #8, .LBB47_11 |
| ; CHECK-NEXT: .LBB47_42: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #9, .LBB47_12 |
| ; CHECK-NEXT: .LBB47_43: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #10, .LBB47_13 |
| ; CHECK-NEXT: .LBB47_44: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #11, .LBB47_14 |
| ; CHECK-NEXT: .LBB47_45: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #12, .LBB47_15 |
| ; CHECK-NEXT: .LBB47_46: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #13, .LBB47_16 |
| ; CHECK-NEXT: .LBB47_47: // %cond.load49 |
| ; CHECK-NEXT: mov w9, #13 // =0xd |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #14, .LBB47_17 |
| ; CHECK-NEXT: .LBB47_48: // %cond.load53 |
| ; CHECK-NEXT: mov w9, #14 // =0xe |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #15, .LBB47_18 |
| ; CHECK-NEXT: .LBB47_49: // %cond.load57 |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #16, .LBB47_19 |
| ; CHECK-NEXT: .LBB47_50: // %cond.load61 |
| ; CHECK-NEXT: mov w9, #16 // =0x10 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #17, .LBB47_20 |
| ; CHECK-NEXT: .LBB47_51: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #18, .LBB47_21 |
| ; CHECK-NEXT: .LBB47_52: // %cond.load69 |
| ; CHECK-NEXT: mov w9, #18 // =0x12 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #19, .LBB47_22 |
| ; CHECK-NEXT: .LBB47_53: // %cond.load73 |
| ; CHECK-NEXT: mov w9, #19 // =0x13 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #20, .LBB47_23 |
| ; CHECK-NEXT: .LBB47_54: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #21, .LBB47_24 |
| ; CHECK-NEXT: .LBB47_55: // %cond.load81 |
| ; CHECK-NEXT: mov w9, #21 // =0x15 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #22, .LBB47_25 |
| ; CHECK-NEXT: .LBB47_56: // %cond.load85 |
| ; CHECK-NEXT: mov w9, #22 // =0x16 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #23, .LBB47_26 |
| ; CHECK-NEXT: .LBB47_57: // %cond.load89 |
| ; CHECK-NEXT: mov w9, #23 // =0x17 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #24, .LBB47_27 |
| ; CHECK-NEXT: .LBB47_58: // %cond.load93 |
| ; CHECK-NEXT: mov w9, #24 // =0x18 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #25, .LBB47_28 |
| ; CHECK-NEXT: .LBB47_59: // %cond.load97 |
| ; CHECK-NEXT: mov w9, #25 // =0x19 |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #26, .LBB47_29 |
| ; CHECK-NEXT: .LBB47_60: // %cond.load101 |
| ; CHECK-NEXT: mov w9, #26 // =0x1a |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #27, .LBB47_30 |
| ; CHECK-NEXT: .LBB47_61: // %cond.load105 |
| ; CHECK-NEXT: mov w9, #27 // =0x1b |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #28, .LBB47_31 |
| ; CHECK-NEXT: .LBB47_62: // %cond.load109 |
| ; CHECK-NEXT: mov w9, #28 // =0x1c |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #29, .LBB47_32 |
| ; CHECK-NEXT: .LBB47_63: // %cond.load113 |
| ; CHECK-NEXT: mov w9, #29 // =0x1d |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #30, .LBB47_33 |
| ; CHECK-NEXT: .LBB47_64: // %cond.load117 |
| ; CHECK-NEXT: mov w9, #30 // =0x1e |
| ; CHECK-NEXT: index z1.h, #0, #1 |
| ; CHECK-NEXT: mov z2.h, w9 |
| ; CHECK-NEXT: ldrh w9, [x0], #2 |
| ; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h |
| ; CHECK-NEXT: mov z0.h, p1/m, w9 |
| ; CHECK-NEXT: tbnz w8, #31, .LBB47_34 |
| ; CHECK-NEXT: b .LBB47_35 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v32i16i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl32 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.d |
| ; CHECK-EXPAND-NEXT: whilelo p2.d, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1h { z0.d }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.d, p1, z0.d |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <32 x i16>, ptr %bp |
| %mask = icmp eq <32 x i16> %b, zeroinitializer |
| %load = call <32 x i16> @llvm.masked.expandload.v32i16(ptr %ap, <32 x i1> %mask, <32 x i16> poison) |
| %ext = zext <32 x i16> %load to <32 x i64> |
| store <32 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_v32i32i64(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 { |
| ; CHECK-LABEL: masked_load_zext_v32i32i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #112 |
| ; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 112 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w21, -24 |
| ; CHECK-NEXT: .cfi_offset w22, -32 |
| ; CHECK-NEXT: .cfi_offset w23, -40 |
| ; CHECK-NEXT: .cfi_offset w24, -48 |
| ; CHECK-NEXT: .cfi_offset w25, -56 |
| ; CHECK-NEXT: .cfi_offset w26, -64 |
| ; CHECK-NEXT: .cfi_offset w27, -72 |
| ; CHECK-NEXT: .cfi_offset w28, -80 |
| ; CHECK-NEXT: .cfi_offset w30, -88 |
| ; CHECK-NEXT: .cfi_offset w29, -96 |
| ; CHECK-NEXT: ptrue p1.s, vl32 |
| ; CHECK-NEXT: ld1w { z0.s }, p1/z, [x1] |
| ; CHECK-NEXT: cmpeq p0.s, p1/z, z0.s, #0 |
| ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; CHECK-NEXT: mov z5.b, z0.b[18] |
| ; CHECK-NEXT: mov z6.b, z0.b[19] |
| ; CHECK-NEXT: umov w13, v0.b[1] |
| ; CHECK-NEXT: umov w3, v0.b[7] |
| ; CHECK-NEXT: umov w4, v0.b[8] |
| ; CHECK-NEXT: mov z7.b, z0.b[20] |
| ; CHECK-NEXT: fmov w6, s0 |
| ; CHECK-NEXT: umov w5, v0.b[9] |
| ; CHECK-NEXT: umov w12, v0.b[2] |
| ; CHECK-NEXT: mov z16.b, z0.b[21] |
| ; CHECK-NEXT: fmov w20, s5 |
| ; CHECK-NEXT: fmov w21, s6 |
| ; CHECK-NEXT: umov w18, v0.b[10] |
| ; CHECK-NEXT: mov z17.b, z0.b[22] |
| ; CHECK-NEXT: fmov w22, s7 |
| ; CHECK-NEXT: and w6, w6, #0x1 |
| ; CHECK-NEXT: umov w11, v0.b[3] |
| ; CHECK-NEXT: umov w1, v0.b[11] |
| ; CHECK-NEXT: bfi w6, w13, #1, #1 |
| ; CHECK-NEXT: ubfiz w13, w3, #7, #1 |
| ; CHECK-NEXT: ubfiz w3, w4, #8, #1 |
| ; CHECK-NEXT: fmov w23, s16 |
| ; CHECK-NEXT: ubfiz w4, w5, #9, #1 |
| ; CHECK-NEXT: ubfiz w5, w20, #18, #1 |
| ; CHECK-NEXT: ubfiz w20, w21, #19, #1 |
| ; CHECK-NEXT: umov w16, v0.b[12] |
| ; CHECK-NEXT: mov z18.b, z0.b[23] |
| ; CHECK-NEXT: fmov w24, s17 |
| ; CHECK-NEXT: bfi w6, w12, #2, #1 |
| ; CHECK-NEXT: orr w12, w13, w3 |
| ; CHECK-NEXT: ubfiz w13, w22, #20, #1 |
| ; CHECK-NEXT: umov w17, v0.b[13] |
| ; CHECK-NEXT: mov z19.b, z0.b[24] |
| ; CHECK-NEXT: orr w3, w5, w20 |
| ; CHECK-NEXT: ubfiz w18, w18, #10, #1 |
| ; CHECK-NEXT: umov w10, v0.b[4] |
| ; CHECK-NEXT: mov z20.b, z0.b[25] |
| ; CHECK-NEXT: orr w12, w12, w4 |
| ; CHECK-NEXT: orr w13, w3, w13 |
| ; CHECK-NEXT: ubfiz w3, w23, #21, #1 |
| ; CHECK-NEXT: umov w14, v0.b[14] |
| ; CHECK-NEXT: mov z21.b, z0.b[26] |
| ; CHECK-NEXT: fmov w25, s18 |
| ; CHECK-NEXT: ubfiz w1, w1, #11, #1 |
| ; CHECK-NEXT: bfi w6, w11, #3, #1 |
| ; CHECK-NEXT: orr w11, w12, w18 |
| ; CHECK-NEXT: ubfiz w12, w24, #22, #1 |
| ; CHECK-NEXT: umov w9, v0.b[5] |
| ; CHECK-NEXT: mov z22.b, z0.b[27] |
| ; CHECK-NEXT: fmov w26, s19 |
| ; CHECK-NEXT: orr w13, w13, w3 |
| ; CHECK-NEXT: ubfiz w16, w16, #12, #1 |
| ; CHECK-NEXT: fmov w27, s20 |
| ; CHECK-NEXT: orr w11, w11, w1 |
| ; CHECK-NEXT: orr w12, w13, w12 |
| ; CHECK-NEXT: ubfiz w13, w17, #13, #1 |
| ; CHECK-NEXT: umov w8, v0.b[6] |
| ; CHECK-NEXT: mov z24.b, z0.b[29] |
| ; CHECK-NEXT: fmov w28, s21 |
| ; CHECK-NEXT: ubfiz w17, w25, #23, #1 |
| ; CHECK-NEXT: bfi w6, w10, #4, #1 |
| ; CHECK-NEXT: orr w10, w11, w16 |
| ; CHECK-NEXT: mov z3.b, z0.b[16] |
| ; CHECK-NEXT: mov z23.b, z0.b[28] |
| ; CHECK-NEXT: fmov w29, s22 |
| ; CHECK-NEXT: ubfiz w11, w26, #24, #1 |
| ; CHECK-NEXT: orr w10, w10, w13 |
| ; CHECK-NEXT: ubfiz w13, w14, #14, #1 |
| ; CHECK-NEXT: umov w15, v0.b[15] |
| ; CHECK-NEXT: mov z4.b, z0.b[17] |
| ; CHECK-NEXT: orr w12, w12, w17 |
| ; CHECK-NEXT: ubfiz w14, w27, #25, #1 |
| ; CHECK-NEXT: bfi w6, w9, #5, #1 |
| ; CHECK-NEXT: mov z2.b, z0.b[30] |
| ; CHECK-NEXT: orr w11, w12, w11 |
| ; CHECK-NEXT: ubfiz w9, w28, #26, #1 |
| ; CHECK-NEXT: orr w10, w10, w13 |
| ; CHECK-NEXT: fmov w13, s24 |
| ; CHECK-NEXT: fmov w7, s3 |
| ; CHECK-NEXT: fmov w30, s23 |
| ; CHECK-NEXT: orr w11, w11, w14 |
| ; CHECK-NEXT: bfi w6, w8, #6, #1 |
| ; CHECK-NEXT: ubfiz w8, w29, #27, #1 |
| ; CHECK-NEXT: fmov w19, s4 |
| ; CHECK-NEXT: orr w9, w11, w9 |
| ; CHECK-NEXT: ubfiz w12, w15, #15, #1 |
| ; CHECK-NEXT: mov z1.b, z0.b[31] |
| ; CHECK-NEXT: orr w8, w9, w8 |
| ; CHECK-NEXT: ubfiz w9, w13, #29, #1 |
| ; CHECK-NEXT: fmov w13, s2 |
| ; CHECK-NEXT: ubfiz w11, w7, #16, #1 |
| ; CHECK-NEXT: ubfiz w14, w30, #28, #1 |
| ; CHECK-NEXT: orr w10, w10, w12 |
| ; CHECK-NEXT: ubfiz w12, w19, #17, #1 |
| ; CHECK-NEXT: orr w10, w10, w11 |
| ; CHECK-NEXT: orr w8, w8, w14 |
| ; CHECK-NEXT: ubfiz w11, w13, #30, #1 |
| ; CHECK-NEXT: orr w10, w10, w12 |
| ; CHECK-NEXT: orr w8, w8, w9 |
| ; CHECK-NEXT: orr w9, w6, w10 |
| ; CHECK-NEXT: orr w8, w8, w11 |
| ; CHECK-NEXT: orr w8, w9, w8 |
| ; CHECK-NEXT: fmov w9, s1 |
| ; CHECK-NEXT: orr w8, w8, w9, lsl #31 |
| ; CHECK-NEXT: tbz w8, #0, .LBB48_2 |
| ; CHECK-NEXT: // %bb.1: // %cond.load |
| ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0] |
| ; CHECK-NEXT: add x0, x0, #4 |
| ; CHECK-NEXT: tbnz w8, #1, .LBB48_3 |
| ; CHECK-NEXT: b .LBB48_4 |
| ; CHECK-NEXT: .LBB48_2: |
| ; CHECK-NEXT: adrp x9, .LCPI48_0 |
| ; CHECK-NEXT: add x9, x9, :lo12:.LCPI48_0 |
| ; CHECK-NEXT: ld1w { z0.s }, p1/z, [x9] |
| ; CHECK-NEXT: tbz w8, #1, .LBB48_4 |
| ; CHECK-NEXT: .LBB48_3: // %cond.load1 |
| ; CHECK-NEXT: mov w9, #1 // =0x1 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: .LBB48_4: // %else2 |
| ; CHECK-NEXT: tbnz w8, #2, .LBB48_36 |
| ; CHECK-NEXT: // %bb.5: // %else6 |
| ; CHECK-NEXT: tbnz w8, #3, .LBB48_37 |
| ; CHECK-NEXT: .LBB48_6: // %else10 |
| ; CHECK-NEXT: tbnz w8, #4, .LBB48_38 |
| ; CHECK-NEXT: .LBB48_7: // %else14 |
| ; CHECK-NEXT: tbnz w8, #5, .LBB48_39 |
| ; CHECK-NEXT: .LBB48_8: // %else18 |
| ; CHECK-NEXT: tbnz w8, #6, .LBB48_40 |
| ; CHECK-NEXT: .LBB48_9: // %else22 |
| ; CHECK-NEXT: tbnz w8, #7, .LBB48_41 |
| ; CHECK-NEXT: .LBB48_10: // %else26 |
| ; CHECK-NEXT: tbnz w8, #8, .LBB48_42 |
| ; CHECK-NEXT: .LBB48_11: // %else30 |
| ; CHECK-NEXT: tbnz w8, #9, .LBB48_43 |
| ; CHECK-NEXT: .LBB48_12: // %else34 |
| ; CHECK-NEXT: tbnz w8, #10, .LBB48_44 |
| ; CHECK-NEXT: .LBB48_13: // %else38 |
| ; CHECK-NEXT: tbnz w8, #11, .LBB48_45 |
| ; CHECK-NEXT: .LBB48_14: // %else42 |
| ; CHECK-NEXT: tbnz w8, #12, .LBB48_46 |
| ; CHECK-NEXT: .LBB48_15: // %else46 |
| ; CHECK-NEXT: tbnz w8, #13, .LBB48_47 |
| ; CHECK-NEXT: .LBB48_16: // %else50 |
| ; CHECK-NEXT: tbnz w8, #14, .LBB48_48 |
| ; CHECK-NEXT: .LBB48_17: // %else54 |
| ; CHECK-NEXT: tbnz w8, #15, .LBB48_49 |
| ; CHECK-NEXT: .LBB48_18: // %else58 |
| ; CHECK-NEXT: tbnz w8, #16, .LBB48_50 |
| ; CHECK-NEXT: .LBB48_19: // %else62 |
| ; CHECK-NEXT: tbnz w8, #17, .LBB48_51 |
| ; CHECK-NEXT: .LBB48_20: // %else66 |
| ; CHECK-NEXT: tbnz w8, #18, .LBB48_52 |
| ; CHECK-NEXT: .LBB48_21: // %else70 |
| ; CHECK-NEXT: tbnz w8, #19, .LBB48_53 |
| ; CHECK-NEXT: .LBB48_22: // %else74 |
| ; CHECK-NEXT: tbnz w8, #20, .LBB48_54 |
| ; CHECK-NEXT: .LBB48_23: // %else78 |
| ; CHECK-NEXT: tbnz w8, #21, .LBB48_55 |
| ; CHECK-NEXT: .LBB48_24: // %else82 |
| ; CHECK-NEXT: tbnz w8, #22, .LBB48_56 |
| ; CHECK-NEXT: .LBB48_25: // %else86 |
| ; CHECK-NEXT: tbnz w8, #23, .LBB48_57 |
| ; CHECK-NEXT: .LBB48_26: // %else90 |
| ; CHECK-NEXT: tbnz w8, #24, .LBB48_58 |
| ; CHECK-NEXT: .LBB48_27: // %else94 |
| ; CHECK-NEXT: tbnz w8, #25, .LBB48_59 |
| ; CHECK-NEXT: .LBB48_28: // %else98 |
| ; CHECK-NEXT: tbnz w8, #26, .LBB48_60 |
| ; CHECK-NEXT: .LBB48_29: // %else102 |
| ; CHECK-NEXT: tbnz w8, #27, .LBB48_61 |
| ; CHECK-NEXT: .LBB48_30: // %else106 |
| ; CHECK-NEXT: tbnz w8, #28, .LBB48_62 |
| ; CHECK-NEXT: .LBB48_31: // %else110 |
| ; CHECK-NEXT: tbnz w8, #29, .LBB48_63 |
| ; CHECK-NEXT: .LBB48_32: // %else114 |
| ; CHECK-NEXT: tbnz w8, #30, .LBB48_64 |
| ; CHECK-NEXT: .LBB48_33: // %else118 |
| ; CHECK-NEXT: tbz w8, #31, .LBB48_35 |
| ; CHECK-NEXT: .LBB48_34: // %cond.load121 |
| ; CHECK-NEXT: mov w8, #31 // =0x1f |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w8 |
| ; CHECK-NEXT: ldr w8, [x0] |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w8 |
| ; CHECK-NEXT: .LBB48_35: // %else122 |
| ; CHECK-NEXT: uunpklo z0.d, z0.s |
| ; CHECK-NEXT: ptrue p0.d, vl32 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: add sp, sp, #112 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB48_36: // %cond.load5 |
| ; CHECK-NEXT: mov w9, #2 // =0x2 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #3, .LBB48_6 |
| ; CHECK-NEXT: .LBB48_37: // %cond.load9 |
| ; CHECK-NEXT: mov w9, #3 // =0x3 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #4, .LBB48_7 |
| ; CHECK-NEXT: .LBB48_38: // %cond.load13 |
| ; CHECK-NEXT: mov w9, #4 // =0x4 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #5, .LBB48_8 |
| ; CHECK-NEXT: .LBB48_39: // %cond.load17 |
| ; CHECK-NEXT: mov w9, #5 // =0x5 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #6, .LBB48_9 |
| ; CHECK-NEXT: .LBB48_40: // %cond.load21 |
| ; CHECK-NEXT: mov w9, #6 // =0x6 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #7, .LBB48_10 |
| ; CHECK-NEXT: .LBB48_41: // %cond.load25 |
| ; CHECK-NEXT: mov w9, #7 // =0x7 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #8, .LBB48_11 |
| ; CHECK-NEXT: .LBB48_42: // %cond.load29 |
| ; CHECK-NEXT: mov w9, #8 // =0x8 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #9, .LBB48_12 |
| ; CHECK-NEXT: .LBB48_43: // %cond.load33 |
| ; CHECK-NEXT: mov w9, #9 // =0x9 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #10, .LBB48_13 |
| ; CHECK-NEXT: .LBB48_44: // %cond.load37 |
| ; CHECK-NEXT: mov w9, #10 // =0xa |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #11, .LBB48_14 |
| ; CHECK-NEXT: .LBB48_45: // %cond.load41 |
| ; CHECK-NEXT: mov w9, #11 // =0xb |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #12, .LBB48_15 |
| ; CHECK-NEXT: .LBB48_46: // %cond.load45 |
| ; CHECK-NEXT: mov w9, #12 // =0xc |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #13, .LBB48_16 |
| ; CHECK-NEXT: .LBB48_47: // %cond.load49 |
| ; CHECK-NEXT: mov w9, #13 // =0xd |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #14, .LBB48_17 |
| ; CHECK-NEXT: .LBB48_48: // %cond.load53 |
| ; CHECK-NEXT: mov w9, #14 // =0xe |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #15, .LBB48_18 |
| ; CHECK-NEXT: .LBB48_49: // %cond.load57 |
| ; CHECK-NEXT: mov w9, #15 // =0xf |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #16, .LBB48_19 |
| ; CHECK-NEXT: .LBB48_50: // %cond.load61 |
| ; CHECK-NEXT: mov w9, #16 // =0x10 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #17, .LBB48_20 |
| ; CHECK-NEXT: .LBB48_51: // %cond.load65 |
| ; CHECK-NEXT: mov w9, #17 // =0x11 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #18, .LBB48_21 |
| ; CHECK-NEXT: .LBB48_52: // %cond.load69 |
| ; CHECK-NEXT: mov w9, #18 // =0x12 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #19, .LBB48_22 |
| ; CHECK-NEXT: .LBB48_53: // %cond.load73 |
| ; CHECK-NEXT: mov w9, #19 // =0x13 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #20, .LBB48_23 |
| ; CHECK-NEXT: .LBB48_54: // %cond.load77 |
| ; CHECK-NEXT: mov w9, #20 // =0x14 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #21, .LBB48_24 |
| ; CHECK-NEXT: .LBB48_55: // %cond.load81 |
| ; CHECK-NEXT: mov w9, #21 // =0x15 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #22, .LBB48_25 |
| ; CHECK-NEXT: .LBB48_56: // %cond.load85 |
| ; CHECK-NEXT: mov w9, #22 // =0x16 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #23, .LBB48_26 |
| ; CHECK-NEXT: .LBB48_57: // %cond.load89 |
| ; CHECK-NEXT: mov w9, #23 // =0x17 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #24, .LBB48_27 |
| ; CHECK-NEXT: .LBB48_58: // %cond.load93 |
| ; CHECK-NEXT: mov w9, #24 // =0x18 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #25, .LBB48_28 |
| ; CHECK-NEXT: .LBB48_59: // %cond.load97 |
| ; CHECK-NEXT: mov w9, #25 // =0x19 |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #26, .LBB48_29 |
| ; CHECK-NEXT: .LBB48_60: // %cond.load101 |
| ; CHECK-NEXT: mov w9, #26 // =0x1a |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #27, .LBB48_30 |
| ; CHECK-NEXT: .LBB48_61: // %cond.load105 |
| ; CHECK-NEXT: mov w9, #27 // =0x1b |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #28, .LBB48_31 |
| ; CHECK-NEXT: .LBB48_62: // %cond.load109 |
| ; CHECK-NEXT: mov w9, #28 // =0x1c |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #29, .LBB48_32 |
| ; CHECK-NEXT: .LBB48_63: // %cond.load113 |
| ; CHECK-NEXT: mov w9, #29 // =0x1d |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbz w8, #30, .LBB48_33 |
| ; CHECK-NEXT: .LBB48_64: // %cond.load117 |
| ; CHECK-NEXT: mov w9, #30 // =0x1e |
| ; CHECK-NEXT: index z1.s, #0, #1 |
| ; CHECK-NEXT: mov z2.s, w9 |
| ; CHECK-NEXT: ldr w9, [x0], #4 |
| ; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; CHECK-NEXT: mov z0.s, p1/m, w9 |
| ; CHECK-NEXT: tbnz w8, #31, .LBB48_34 |
| ; CHECK-NEXT: b .LBB48_35 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_v32i32i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl32 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.d }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.d |
| ; CHECK-EXPAND-NEXT: whilelo p2.d, xzr, x8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.d }, p2/z, [x0] |
| ; CHECK-EXPAND-NEXT: expand z0.d, p1, z0.d |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <32 x i32>, ptr %bp |
| %mask = icmp eq <32 x i32> %b, zeroinitializer |
| %load = call <32 x i32> @llvm.masked.expandload.v32i32(ptr %ap, <32 x i1> %mask, <32 x i32> poison) |
| %ext = zext <32 x i32> %load to <32 x i64> |
| store <32 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_sext_ugt_v8i32i64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_sext_ugt_v8i32i64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl8 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpne p0.s, p1/z, z0.s, #0 |
| ; VBITS_GE_256-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p0.s |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB49_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rw { z0.s }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #4 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB49_3 |
| ; VBITS_GE_256-NEXT: b .LBB49_4 |
| ; VBITS_GE_256-NEXT: .LBB49_2: |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI49_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI49_0 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB49_4 |
| ; VBITS_GE_256-NEXT: .LBB49_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB49_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB49_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB49_13 |
| ; VBITS_GE_256-NEXT: .LBB49_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB49_14 |
| ; VBITS_GE_256-NEXT: .LBB49_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB49_15 |
| ; VBITS_GE_256-NEXT: .LBB49_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB49_16 |
| ; VBITS_GE_256-NEXT: .LBB49_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB49_11 |
| ; VBITS_GE_256-NEXT: .LBB49_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w8 |
| ; VBITS_GE_256-NEXT: ldr w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB49_11: // %else26 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB49_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB49_6 |
| ; VBITS_GE_256-NEXT: .LBB49_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB49_7 |
| ; VBITS_GE_256-NEXT: .LBB49_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB49_8 |
| ; VBITS_GE_256-NEXT: .LBB49_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB49_9 |
| ; VBITS_GE_256-NEXT: .LBB49_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB49_10 |
| ; VBITS_GE_256-NEXT: b .LBB49_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_sext_ugt_v8i32i64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p1.s, vl8 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpne p0.s, p1/z, z0.s, #0 |
| ; VBITS_GE_512-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p0.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB49_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rw { z0.s }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #4 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB49_3 |
| ; VBITS_GE_512-NEXT: b .LBB49_4 |
| ; VBITS_GE_512-NEXT: .LBB49_2: |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI49_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI49_0 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB49_4 |
| ; VBITS_GE_512-NEXT: .LBB49_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB49_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB49_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB49_13 |
| ; VBITS_GE_512-NEXT: .LBB49_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB49_14 |
| ; VBITS_GE_512-NEXT: .LBB49_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB49_15 |
| ; VBITS_GE_512-NEXT: .LBB49_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB49_16 |
| ; VBITS_GE_512-NEXT: .LBB49_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB49_11 |
| ; VBITS_GE_512-NEXT: .LBB49_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w8 |
| ; VBITS_GE_512-NEXT: ldr w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB49_11: // %else26 |
| ; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB49_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB49_6 |
| ; VBITS_GE_512-NEXT: .LBB49_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB49_7 |
| ; VBITS_GE_512-NEXT: .LBB49_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB49_8 |
| ; VBITS_GE_512-NEXT: .LBB49_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB49_9 |
| ; VBITS_GE_512-NEXT: .LBB49_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB49_10 |
| ; VBITS_GE_512-NEXT: b .LBB49_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_sext_ugt_v8i32i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpne p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s |
| ; CHECK-EXPAND-NEXT: whilelo p0.s, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i32>, ptr %bp |
| %mask = icmp ugt <8 x i32> %b, zeroinitializer |
| %load = call <8 x i32> @llvm.masked.expandload.v8i32(ptr %ap, <8 x i1> %mask, <8 x i32> poison) |
| %ext = sext <8 x i32> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| define void @masked_load_zext_sgt_v8i32i64(ptr %ap, ptr %bp, ptr %c) #0 { |
| ; VBITS_GE_256-LABEL: masked_load_zext_sgt_v8i32i64: |
| ; VBITS_GE_256: // %bb.0: |
| ; VBITS_GE_256-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_256-NEXT: ptrue p1.s, vl8 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x1] |
| ; VBITS_GE_256-NEXT: cmpgt p0.s, p1/z, z0.s, #0 |
| ; VBITS_GE_256-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_256-NEXT: ptrue p0.s |
| ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_256-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_256-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_256-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_256-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_256-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_256-NEXT: tbz w9, #0, .LBB50_2 |
| ; VBITS_GE_256-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_256-NEXT: ld1rw { z0.s }, p0/z, [x0] |
| ; VBITS_GE_256-NEXT: add x0, x0, #4 |
| ; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB50_3 |
| ; VBITS_GE_256-NEXT: b .LBB50_4 |
| ; VBITS_GE_256-NEXT: .LBB50_2: |
| ; VBITS_GE_256-NEXT: adrp x9, .LCPI50_0 |
| ; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI50_0 |
| ; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x9] |
| ; VBITS_GE_256-NEXT: tbz w8, #1, .LBB50_4 |
| ; VBITS_GE_256-NEXT: .LBB50_3: // %cond.load1 |
| ; VBITS_GE_256-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: .LBB50_4: // %else2 |
| ; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB50_12 |
| ; VBITS_GE_256-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB50_13 |
| ; VBITS_GE_256-NEXT: .LBB50_6: // %else10 |
| ; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB50_14 |
| ; VBITS_GE_256-NEXT: .LBB50_7: // %else14 |
| ; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB50_15 |
| ; VBITS_GE_256-NEXT: .LBB50_8: // %else18 |
| ; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB50_16 |
| ; VBITS_GE_256-NEXT: .LBB50_9: // %else22 |
| ; VBITS_GE_256-NEXT: tbz w8, #7, .LBB50_11 |
| ; VBITS_GE_256-NEXT: .LBB50_10: // %cond.load25 |
| ; VBITS_GE_256-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w8 |
| ; VBITS_GE_256-NEXT: ldr w8, [x0] |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w8 |
| ; VBITS_GE_256-NEXT: .LBB50_11: // %else26 |
| ; VBITS_GE_256-NEXT: movprfx z1, z0 |
| ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s |
| ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; VBITS_GE_256-NEXT: add sp, sp, #16 |
| ; VBITS_GE_256-NEXT: ret |
| ; VBITS_GE_256-NEXT: .LBB50_12: // %cond.load5 |
| ; VBITS_GE_256-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #3, .LBB50_6 |
| ; VBITS_GE_256-NEXT: .LBB50_13: // %cond.load9 |
| ; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #4, .LBB50_7 |
| ; VBITS_GE_256-NEXT: .LBB50_14: // %cond.load13 |
| ; VBITS_GE_256-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #5, .LBB50_8 |
| ; VBITS_GE_256-NEXT: .LBB50_15: // %cond.load17 |
| ; VBITS_GE_256-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbz w8, #6, .LBB50_9 |
| ; VBITS_GE_256-NEXT: .LBB50_16: // %cond.load21 |
| ; VBITS_GE_256-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_256-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_256-NEXT: mov z2.s, w9 |
| ; VBITS_GE_256-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB50_10 |
| ; VBITS_GE_256-NEXT: b .LBB50_11 |
| ; |
| ; VBITS_GE_512-LABEL: masked_load_zext_sgt_v8i32i64: |
| ; VBITS_GE_512: // %bb.0: |
| ; VBITS_GE_512-NEXT: sub sp, sp, #16 |
| ; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16 |
| ; VBITS_GE_512-NEXT: ptrue p1.s, vl8 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x1] |
| ; VBITS_GE_512-NEXT: cmpgt p0.s, p1/z, z0.s, #0 |
| ; VBITS_GE_512-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff |
| ; VBITS_GE_512-NEXT: ptrue p0.s |
| ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h |
| ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b |
| ; VBITS_GE_512-NEXT: umov w8, v0.b[0] |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[1] |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[2] |
| ; VBITS_GE_512-NEXT: and w8, w8, #0x1 |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[3] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[4] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[5] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1 |
| ; VBITS_GE_512-NEXT: umov w10, v0.b[6] |
| ; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1 |
| ; VBITS_GE_512-NEXT: umov w9, v0.b[7] |
| ; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1 |
| ; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7 |
| ; VBITS_GE_512-NEXT: and w8, w9, #0xff |
| ; VBITS_GE_512-NEXT: tbz w9, #0, .LBB50_2 |
| ; VBITS_GE_512-NEXT: // %bb.1: // %cond.load |
| ; VBITS_GE_512-NEXT: ld1rw { z0.s }, p0/z, [x0] |
| ; VBITS_GE_512-NEXT: add x0, x0, #4 |
| ; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB50_3 |
| ; VBITS_GE_512-NEXT: b .LBB50_4 |
| ; VBITS_GE_512-NEXT: .LBB50_2: |
| ; VBITS_GE_512-NEXT: adrp x9, .LCPI50_0 |
| ; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI50_0 |
| ; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x9] |
| ; VBITS_GE_512-NEXT: tbz w8, #1, .LBB50_4 |
| ; VBITS_GE_512-NEXT: .LBB50_3: // %cond.load1 |
| ; VBITS_GE_512-NEXT: mov w9, #1 // =0x1 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: .LBB50_4: // %else2 |
| ; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB50_12 |
| ; VBITS_GE_512-NEXT: // %bb.5: // %else6 |
| ; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB50_13 |
| ; VBITS_GE_512-NEXT: .LBB50_6: // %else10 |
| ; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB50_14 |
| ; VBITS_GE_512-NEXT: .LBB50_7: // %else14 |
| ; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB50_15 |
| ; VBITS_GE_512-NEXT: .LBB50_8: // %else18 |
| ; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB50_16 |
| ; VBITS_GE_512-NEXT: .LBB50_9: // %else22 |
| ; VBITS_GE_512-NEXT: tbz w8, #7, .LBB50_11 |
| ; VBITS_GE_512-NEXT: .LBB50_10: // %cond.load25 |
| ; VBITS_GE_512-NEXT: mov w8, #7 // =0x7 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w8 |
| ; VBITS_GE_512-NEXT: ldr w8, [x0] |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w8 |
| ; VBITS_GE_512-NEXT: .LBB50_11: // %else26 |
| ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s |
| ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] |
| ; VBITS_GE_512-NEXT: add sp, sp, #16 |
| ; VBITS_GE_512-NEXT: ret |
| ; VBITS_GE_512-NEXT: .LBB50_12: // %cond.load5 |
| ; VBITS_GE_512-NEXT: mov w9, #2 // =0x2 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #3, .LBB50_6 |
| ; VBITS_GE_512-NEXT: .LBB50_13: // %cond.load9 |
| ; VBITS_GE_512-NEXT: mov w9, #3 // =0x3 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #4, .LBB50_7 |
| ; VBITS_GE_512-NEXT: .LBB50_14: // %cond.load13 |
| ; VBITS_GE_512-NEXT: mov w9, #4 // =0x4 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #5, .LBB50_8 |
| ; VBITS_GE_512-NEXT: .LBB50_15: // %cond.load17 |
| ; VBITS_GE_512-NEXT: mov w9, #5 // =0x5 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbz w8, #6, .LBB50_9 |
| ; VBITS_GE_512-NEXT: .LBB50_16: // %cond.load21 |
| ; VBITS_GE_512-NEXT: mov w9, #6 // =0x6 |
| ; VBITS_GE_512-NEXT: index z1.s, #0, #1 |
| ; VBITS_GE_512-NEXT: mov z2.s, w9 |
| ; VBITS_GE_512-NEXT: ldr w9, [x0], #4 |
| ; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s |
| ; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9 |
| ; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB50_10 |
| ; VBITS_GE_512-NEXT: b .LBB50_11 |
| ; |
| ; CHECK-EXPAND-LABEL: masked_load_zext_sgt_v8i32i64: |
| ; CHECK-EXPAND: // %bb.0: |
| ; CHECK-EXPAND-NEXT: ptrue p0.s, vl8 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1] |
| ; CHECK-EXPAND-NEXT: cmpgt p1.s, p0/z, z0.s, #0 |
| ; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s |
| ; CHECK-EXPAND-NEXT: whilelo p0.s, xzr, x8 |
| ; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4 |
| ; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0] |
| ; CHECK-EXPAND-NEXT: ptrue p0.d, vl4 |
| ; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s |
| ; CHECK-EXPAND-NEXT: movprfx z1, z0 |
| ; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16 |
| ; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s |
| ; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s |
| ; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2] |
| ; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] |
| ; CHECK-EXPAND-NEXT: ret |
| %b = load <8 x i32>, ptr %bp |
| %mask = icmp sgt <8 x i32> %b, zeroinitializer |
| %load = call <8 x i32> @llvm.masked.expandload.v8i32(ptr %ap, <8 x i1> %mask, <8 x i32> poison) |
| %ext = zext <8 x i32> %load to <8 x i64> |
| store <8 x i64> %ext, ptr %c |
| ret void |
| } |
| |
| attributes #0 = { "target-features"="+sve" } |