blob: 8789167e611e41fc50b1dab435b377c98f93aa36 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=256 -mattr=sve2p2 < %s | FileCheck %s -check-prefixes=CHECK-EXPAND
target triple = "aarch64-unknown-linux-gnu"
;
; Masked Loads
;
define <2 x half> @masked_load_v2f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_load_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ldr s1, [x1]
; CHECK-NEXT: fcmeq v0.4h, v0.4h, v1.4h
; CHECK-NEXT: index z1.s, #1, #1
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: addp v1.2s, v0.2s, v0.2s
; CHECK-NEXT: movi d0, #0000000000000000
; CHECK-NEXT: fmov w8, s1
; CHECK-NEXT: tbnz w8, #0, .LBB0_3
; CHECK-NEXT: // %bb.1: // %else
; CHECK-NEXT: tbnz w8, #1, .LBB0_4
; CHECK-NEXT: .LBB0_2: // %else2
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_3: // %cond.load
; CHECK-NEXT: movi d0, #0000000000000000
; CHECK-NEXT: ld1 { v0.h }[0], [x0], #2
; CHECK-NEXT: tbz w8, #1, .LBB0_2
; CHECK-NEXT: .LBB0_4: // %cond.load1
; CHECK-NEXT: ld1 { v0.h }[1], [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
;
; CHECK-EXPAND-LABEL: masked_load_v2f16:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ldr s1, [x0]
; CHECK-EXPAND-NEXT: ldr s2, [x1]
; CHECK-EXPAND-NEXT: movi v0.2d, #0000000000000000
; CHECK-EXPAND-NEXT: ptrue p0.h, vl4
; CHECK-EXPAND-NEXT: fcmeq v1.4h, v1.4h, v2.4h
; CHECK-EXPAND-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-EXPAND-NEXT: mov v0.h[0], v1.h[0]
; CHECK-EXPAND-NEXT: mov w8, v1.s[1]
; CHECK-EXPAND-NEXT: mov v0.h[1], w8
; CHECK-EXPAND-NEXT: cmpne p1.h, p0/z, z0.h, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h
; CHECK-EXPAND-NEXT: whilelo p0.h, xzr, x8
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h
; CHECK-EXPAND-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-EXPAND-NEXT: ret
%a = load <2 x half>, ptr %ap
%b = load <2 x half>, ptr %bp
%mask = fcmp oeq <2 x half> %a, %b
%load = call <2 x half> @llvm.masked.expandload.v2f16(ptr %ap, <2 x i1> %mask, <2 x half> zeroinitializer)
ret <2 x half> %load
}
define <2 x float> @masked_load_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_load_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d2, [x1]
; CHECK-NEXT: index z0.s, #1, #1
; CHECK-NEXT: fcmeq v1.2s, v1.2s, v2.2s
; CHECK-NEXT: and v0.8b, v1.8b, v0.8b
; CHECK-NEXT: addp v1.2s, v0.2s, v0.2s
; CHECK-NEXT: movi d0, #0000000000000000
; CHECK-NEXT: fmov w8, s1
; CHECK-NEXT: tbnz w8, #0, .LBB1_3
; CHECK-NEXT: // %bb.1: // %else
; CHECK-NEXT: tbnz w8, #1, .LBB1_4
; CHECK-NEXT: .LBB1_2: // %else2
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_3: // %cond.load
; CHECK-NEXT: movi d0, #0000000000000000
; CHECK-NEXT: ld1 { v0.s }[0], [x0], #4
; CHECK-NEXT: tbz w8, #1, .LBB1_2
; CHECK-NEXT: .LBB1_4: // %cond.load1
; CHECK-NEXT: ld1 { v0.s }[1], [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
;
; CHECK-EXPAND-LABEL: masked_load_v2f32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ldr d0, [x0]
; CHECK-EXPAND-NEXT: ldr d1, [x1]
; CHECK-EXPAND-NEXT: ptrue p0.s, vl2
; CHECK-EXPAND-NEXT: fcmeq v0.2s, v0.2s, v1.2s
; CHECK-EXPAND-NEXT: cmpne p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s
; CHECK-EXPAND-NEXT: whilelo p0.s, xzr, x8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s
; CHECK-EXPAND-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-EXPAND-NEXT: ret
%a = load <2 x float>, ptr %ap
%b = load <2 x float>, ptr %bp
%mask = fcmp oeq <2 x float> %a, %b
%load = call <2 x float> @llvm.masked.expandload.v2f32(ptr %ap, <2 x i1> %mask, <2 x float> zeroinitializer)
ret <2 x float> %load
}
define <4 x float> @masked_load_v4f32(ptr %ap, ptr %bp) vscale_range(1,0) #0 {
; CHECK-LABEL: masked_load_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: addv s1, v0.4s
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: fmov w8, s1
; CHECK-NEXT: tbnz w8, #0, .LBB2_5
; CHECK-NEXT: // %bb.1: // %else
; CHECK-NEXT: tbnz w8, #1, .LBB2_6
; CHECK-NEXT: .LBB2_2: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB2_7
; CHECK-NEXT: .LBB2_3: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB2_8
; CHECK-NEXT: .LBB2_4: // %else10
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_5: // %cond.load
; CHECK-NEXT: ld1 { v0.s }[0], [x0], #4
; CHECK-NEXT: tbz w8, #1, .LBB2_2
; CHECK-NEXT: .LBB2_6: // %cond.load1
; CHECK-NEXT: ld1 { v0.s }[1], [x0], #4
; CHECK-NEXT: tbz w8, #2, .LBB2_3
; CHECK-NEXT: .LBB2_7: // %cond.load5
; CHECK-NEXT: ld1 { v0.s }[2], [x0], #4
; CHECK-NEXT: tbz w8, #3, .LBB2_4
; CHECK-NEXT: .LBB2_8: // %cond.load9
; CHECK-NEXT: ld1 { v0.s }[3], [x0]
; CHECK-NEXT: ret
;
; CHECK-EXPAND-LABEL: masked_load_v4f32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ldr q0, [x0]
; CHECK-EXPAND-NEXT: ldr q1, [x1]
; CHECK-EXPAND-NEXT: adrp x8, .LCPI2_0
; CHECK-EXPAND-NEXT: fcmeq v0.4s, v0.4s, v1.4s
; CHECK-EXPAND-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
; CHECK-EXPAND-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-EXPAND-NEXT: addv s1, v0.4s
; CHECK-EXPAND-NEXT: movi v0.2d, #0000000000000000
; CHECK-EXPAND-NEXT: fmov w8, s1
; CHECK-EXPAND-NEXT: tbnz w8, #0, .LBB2_5
; CHECK-EXPAND-NEXT: // %bb.1: // %else
; CHECK-EXPAND-NEXT: tbnz w8, #1, .LBB2_6
; CHECK-EXPAND-NEXT: .LBB2_2: // %else2
; CHECK-EXPAND-NEXT: tbnz w8, #2, .LBB2_7
; CHECK-EXPAND-NEXT: .LBB2_3: // %else6
; CHECK-EXPAND-NEXT: tbnz w8, #3, .LBB2_8
; CHECK-EXPAND-NEXT: .LBB2_4: // %else10
; CHECK-EXPAND-NEXT: ret
; CHECK-EXPAND-NEXT: .LBB2_5: // %cond.load
; CHECK-EXPAND-NEXT: ld1 { v0.s }[0], [x0], #4
; CHECK-EXPAND-NEXT: tbz w8, #1, .LBB2_2
; CHECK-EXPAND-NEXT: .LBB2_6: // %cond.load1
; CHECK-EXPAND-NEXT: ld1 { v0.s }[1], [x0], #4
; CHECK-EXPAND-NEXT: tbz w8, #2, .LBB2_3
; CHECK-EXPAND-NEXT: .LBB2_7: // %cond.load5
; CHECK-EXPAND-NEXT: ld1 { v0.s }[2], [x0], #4
; CHECK-EXPAND-NEXT: tbz w8, #3, .LBB2_4
; CHECK-EXPAND-NEXT: .LBB2_8: // %cond.load9
; CHECK-EXPAND-NEXT: ld1 { v0.s }[3], [x0]
; CHECK-EXPAND-NEXT: ret
%a = load <4 x float>, ptr %ap
%b = load <4 x float>, ptr %bp
%mask = fcmp oeq <4 x float> %a, %b
%load = call <4 x float> @llvm.masked.expandload.v4f32(ptr %ap, <4 x i1> %mask, <4 x float> zeroinitializer)
ret <4 x float> %load
}
define void @masked_load_v8f32(ptr %ap, ptr %bp, ptr %c) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_load_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: sub x9, sp, #48
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: umov w8, v0.b[0]
; CHECK-NEXT: umov w9, v0.b[1]
; CHECK-NEXT: umov w10, v0.b[2]
; CHECK-NEXT: and w8, w8, #0x1
; CHECK-NEXT: bfi w8, w9, #1, #1
; CHECK-NEXT: umov w9, v0.b[3]
; CHECK-NEXT: bfi w8, w10, #2, #1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: bfi w8, w9, #3, #1
; CHECK-NEXT: umov w9, v0.b[5]
; CHECK-NEXT: bfi w8, w10, #4, #1
; CHECK-NEXT: umov w10, v0.b[6]
; CHECK-NEXT: bfi w8, w9, #5, #1
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: bfi w8, w10, #6, #1
; CHECK-NEXT: orr w9, w8, w9, lsl #7
; CHECK-NEXT: and w8, w9, #0xff
; CHECK-NEXT: tbz w9, #0, .LBB3_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ldr s0, [x0], #4
; CHECK-NEXT: stp xzr, xzr, [sp, #16]
; CHECK-NEXT: str xzr, [sp, #8]
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: str wzr, [sp, #4]
; CHECK-NEXT: str s0, [sp]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9]
; CHECK-NEXT: tbnz w8, #1, .LBB3_3
; CHECK-NEXT: b .LBB3_4
; CHECK-NEXT: .LBB3_2:
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: tbz w8, #1, .LBB3_4
; CHECK-NEXT: .LBB3_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: .LBB3_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB3_12
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB3_13
; CHECK-NEXT: .LBB3_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB3_14
; CHECK-NEXT: .LBB3_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB3_15
; CHECK-NEXT: .LBB3_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB3_16
; CHECK-NEXT: .LBB3_9: // %else22
; CHECK-NEXT: tbz w8, #7, .LBB3_11
; CHECK-NEXT: .LBB3_10: // %cond.load25
; CHECK-NEXT: mov w8, #7 // =0x7
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0]
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: .LBB3_11: // %else26
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB3_12: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #3, .LBB3_6
; CHECK-NEXT: .LBB3_13: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #4, .LBB3_7
; CHECK-NEXT: .LBB3_14: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #5, .LBB3_8
; CHECK-NEXT: .LBB3_15: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #6, .LBB3_9
; CHECK-NEXT: .LBB3_16: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbnz w8, #7, .LBB3_10
; CHECK-NEXT: b .LBB3_11
;
; CHECK-EXPAND-LABEL: masked_load_v8f32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s
; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%a = load <8 x float>, ptr %ap
%b = load <8 x float>, ptr %bp
%mask = fcmp oeq <8 x float> %a, %b
%load = call <8 x float> @llvm.masked.expandload.v8f32(ptr %ap, <8 x i1> %mask, <8 x float> zeroinitializer)
store <8 x float> %load, ptr %c
ret void
}
define void @masked_load_v16f32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; VBITS_GE_256-NEXT: sub x9, sp, #48
; VBITS_GE_256-NEXT: mov x29, sp
; VBITS_GE_256-NEXT: and sp, x9, #0xffffffffffffffe0
; VBITS_GE_256-NEXT: .cfi_def_cfa w29, 16
; VBITS_GE_256-NEXT: .cfi_offset w30, -8
; VBITS_GE_256-NEXT: .cfi_offset w29, -16
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: adrp x8, .LCPI4_0
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
; VBITS_GE_256-NEXT: ldr q0, [x8, :lo12:.LCPI4_0]
; VBITS_GE_256-NEXT: and v0.16b, v1.16b, v0.16b
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b
; VBITS_GE_256-NEXT: addv h0, v0.8h
; VBITS_GE_256-NEXT: fmov w9, s0
; VBITS_GE_256-NEXT: fmov w8, s0
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB4_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldr s0, [x0], #4
; VBITS_GE_256-NEXT: stp xzr, xzr, [sp, #16]
; VBITS_GE_256-NEXT: str xzr, [sp, #8]
; VBITS_GE_256-NEXT: mov x9, sp
; VBITS_GE_256-NEXT: movi v1.2d, #0000000000000000
; VBITS_GE_256-NEXT: str wzr, [sp, #4]
; VBITS_GE_256-NEXT: str s0, [sp]
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x9]
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB4_3
; VBITS_GE_256-NEXT: b .LBB4_4
; VBITS_GE_256-NEXT: .LBB4_2:
; VBITS_GE_256-NEXT: movi v0.2d, #0000000000000000
; VBITS_GE_256-NEXT: movi v1.2d, #0000000000000000
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB4_4
; VBITS_GE_256-NEXT: .LBB4_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2
; VBITS_GE_256-NEXT: .LBB4_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB4_20
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB4_21
; VBITS_GE_256-NEXT: .LBB4_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB4_22
; VBITS_GE_256-NEXT: .LBB4_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB4_23
; VBITS_GE_256-NEXT: .LBB4_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB4_24
; VBITS_GE_256-NEXT: .LBB4_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB4_25
; VBITS_GE_256-NEXT: .LBB4_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB4_26
; VBITS_GE_256-NEXT: .LBB4_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB4_27
; VBITS_GE_256-NEXT: .LBB4_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB4_28
; VBITS_GE_256-NEXT: .LBB4_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB4_29
; VBITS_GE_256-NEXT: .LBB4_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB4_30
; VBITS_GE_256-NEXT: .LBB4_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB4_31
; VBITS_GE_256-NEXT: .LBB4_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB4_32
; VBITS_GE_256-NEXT: .LBB4_17: // %else54
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB4_19
; VBITS_GE_256-NEXT: .LBB4_18: // %cond.load57
; VBITS_GE_256-NEXT: mov w8, #7 // =0x7
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w8
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0]
; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2
; VBITS_GE_256-NEXT: .LBB4_19: // %else58
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; VBITS_GE_256-NEXT: mov sp, x29
; VBITS_GE_256-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB4_20: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB4_6
; VBITS_GE_256-NEXT: .LBB4_21: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB4_7
; VBITS_GE_256-NEXT: .LBB4_22: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB4_8
; VBITS_GE_256-NEXT: .LBB4_23: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB4_9
; VBITS_GE_256-NEXT: .LBB4_24: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB4_10
; VBITS_GE_256-NEXT: .LBB4_25: // %cond.load25
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z0.s, p2/m, s2
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB4_11
; VBITS_GE_256-NEXT: .LBB4_26: // %cond.load29
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: ptrue p1.s, vl1
; VBITS_GE_256-NEXT: mov z1.s, p1/m, z2.s
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB4_12
; VBITS_GE_256-NEXT: .LBB4_27: // %cond.load33
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB4_13
; VBITS_GE_256-NEXT: .LBB4_28: // %cond.load37
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB4_14
; VBITS_GE_256-NEXT: .LBB4_29: // %cond.load41
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB4_15
; VBITS_GE_256-NEXT: .LBB4_30: // %cond.load45
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB4_16
; VBITS_GE_256-NEXT: .LBB4_31: // %cond.load49
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB4_17
; VBITS_GE_256-NEXT: .LBB4_32: // %cond.load53
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: ldr s2, [x0], #4
; VBITS_GE_256-NEXT: mov z1.s, p2/m, s2
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB4_18
; VBITS_GE_256-NEXT: b .LBB4_19
;
; VBITS_GE_512-LABEL: masked_load_v16f32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; VBITS_GE_512-NEXT: sub x9, sp, #112
; VBITS_GE_512-NEXT: mov x29, sp
; VBITS_GE_512-NEXT: and sp, x9, #0xffffffffffffffc0
; VBITS_GE_512-NEXT: .cfi_def_cfa w29, 16
; VBITS_GE_512-NEXT: .cfi_offset w30, -8
; VBITS_GE_512-NEXT: .cfi_offset w29, -16
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: umov w11, v0.b[7]
; VBITS_GE_512-NEXT: umov w12, v0.b[8]
; VBITS_GE_512-NEXT: umov w13, v0.b[3]
; VBITS_GE_512-NEXT: umov w14, v0.b[4]
; VBITS_GE_512-NEXT: umov w15, v0.b[10]
; VBITS_GE_512-NEXT: umov w16, v0.b[5]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[9]
; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1
; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1
; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[11]
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: umov w12, v0.b[13]
; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[12]
; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1
; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: orr w9, w11, w9
; VBITS_GE_512-NEXT: umov w11, v0.b[6]
; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1
; VBITS_GE_512-NEXT: orr w9, w9, w15
; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1
; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[15]
; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: orr w9, w9, w12
; VBITS_GE_512-NEXT: orr w8, w8, w9
; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15
; VBITS_GE_512-NEXT: and w8, w9, #0xffff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB4_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldr s0, [x0], #4
; VBITS_GE_512-NEXT: stp xzr, xzr, [sp, #48]
; VBITS_GE_512-NEXT: stp xzr, xzr, [sp, #32]
; VBITS_GE_512-NEXT: mov x9, sp
; VBITS_GE_512-NEXT: stp xzr, xzr, [sp, #16]
; VBITS_GE_512-NEXT: str xzr, [sp, #8]
; VBITS_GE_512-NEXT: str wzr, [sp, #4]
; VBITS_GE_512-NEXT: str s0, [sp]
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x9]
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB4_3
; VBITS_GE_512-NEXT: b .LBB4_4
; VBITS_GE_512-NEXT: .LBB4_2:
; VBITS_GE_512-NEXT: movi v0.2d, #0000000000000000
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB4_4
; VBITS_GE_512-NEXT: .LBB4_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: .LBB4_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB4_20
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB4_21
; VBITS_GE_512-NEXT: .LBB4_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB4_22
; VBITS_GE_512-NEXT: .LBB4_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB4_23
; VBITS_GE_512-NEXT: .LBB4_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB4_24
; VBITS_GE_512-NEXT: .LBB4_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB4_25
; VBITS_GE_512-NEXT: .LBB4_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB4_26
; VBITS_GE_512-NEXT: .LBB4_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB4_27
; VBITS_GE_512-NEXT: .LBB4_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB4_28
; VBITS_GE_512-NEXT: .LBB4_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB4_29
; VBITS_GE_512-NEXT: .LBB4_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB4_30
; VBITS_GE_512-NEXT: .LBB4_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB4_31
; VBITS_GE_512-NEXT: .LBB4_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB4_32
; VBITS_GE_512-NEXT: .LBB4_17: // %else54
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB4_19
; VBITS_GE_512-NEXT: .LBB4_18: // %cond.load57
; VBITS_GE_512-NEXT: mov w8, #15 // =0xf
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w8
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0]
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: .LBB4_19: // %else58
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: mov sp, x29
; VBITS_GE_512-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB4_20: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB4_6
; VBITS_GE_512-NEXT: .LBB4_21: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB4_7
; VBITS_GE_512-NEXT: .LBB4_22: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB4_8
; VBITS_GE_512-NEXT: .LBB4_23: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB4_9
; VBITS_GE_512-NEXT: .LBB4_24: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB4_10
; VBITS_GE_512-NEXT: .LBB4_25: // %cond.load25
; VBITS_GE_512-NEXT: mov w9, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB4_11
; VBITS_GE_512-NEXT: .LBB4_26: // %cond.load29
; VBITS_GE_512-NEXT: mov w9, #8 // =0x8
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB4_12
; VBITS_GE_512-NEXT: .LBB4_27: // %cond.load33
; VBITS_GE_512-NEXT: mov w9, #9 // =0x9
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB4_13
; VBITS_GE_512-NEXT: .LBB4_28: // %cond.load37
; VBITS_GE_512-NEXT: mov w9, #10 // =0xa
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB4_14
; VBITS_GE_512-NEXT: .LBB4_29: // %cond.load41
; VBITS_GE_512-NEXT: mov w9, #11 // =0xb
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB4_15
; VBITS_GE_512-NEXT: .LBB4_30: // %cond.load45
; VBITS_GE_512-NEXT: mov w9, #12 // =0xc
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB4_16
; VBITS_GE_512-NEXT: .LBB4_31: // %cond.load49
; VBITS_GE_512-NEXT: mov w9, #13 // =0xd
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB4_17
; VBITS_GE_512-NEXT: .LBB4_32: // %cond.load53
; VBITS_GE_512-NEXT: mov w9, #14 // =0xe
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: ldr s1, [x0], #4
; VBITS_GE_512-NEXT: mov z0.s, p2/m, s1
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB4_18
; VBITS_GE_512-NEXT: b .LBB4_19
;
; CHECK-EXPAND-LABEL: masked_load_v16f32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: sub sp, sp, #16
; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 16
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: ptrue p3.s
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-EXPAND-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-EXPAND-NEXT: umov w8, v0.b[0]
; CHECK-EXPAND-NEXT: umov w9, v0.b[1]
; CHECK-EXPAND-NEXT: umov w10, v0.b[2]
; CHECK-EXPAND-NEXT: umov w11, v0.b[3]
; CHECK-EXPAND-NEXT: and w8, w8, #0x1
; CHECK-EXPAND-NEXT: bfi w8, w9, #1, #1
; CHECK-EXPAND-NEXT: umov w9, v0.b[4]
; CHECK-EXPAND-NEXT: bfi w8, w10, #2, #1
; CHECK-EXPAND-NEXT: umov w10, v0.b[5]
; CHECK-EXPAND-NEXT: bfi w8, w11, #3, #1
; CHECK-EXPAND-NEXT: mov x11, #8 // =0x8
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1, x11, lsl #2]
; CHECK-EXPAND-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2]
; CHECK-EXPAND-NEXT: bfi w8, w9, #4, #1
; CHECK-EXPAND-NEXT: umov w9, v0.b[6]
; CHECK-EXPAND-NEXT: bfi w8, w10, #5, #1
; CHECK-EXPAND-NEXT: umov w10, v0.b[7]
; CHECK-EXPAND-NEXT: fcmeq p2.s, p0/z, z2.s, z1.s
; CHECK-EXPAND-NEXT: bfi w8, w9, #6, #1
; CHECK-EXPAND-NEXT: orr w8, w8, w10, lsl #7
; CHECK-EXPAND-NEXT: cntp x10, p1, p1.s
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.s
; CHECK-EXPAND-NEXT: and w8, w8, #0xff
; CHECK-EXPAND-NEXT: fmov s0, w8
; CHECK-EXPAND-NEXT: whilelo p4.s, xzr, x10
; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s
; CHECK-EXPAND-NEXT: whilelo p3.s, xzr, x9
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p4/z, [x0]
; CHECK-EXPAND-NEXT: fmov w8, s0
; CHECK-EXPAND-NEXT: expand z1.s, p1, z1.s
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p3/z, [x0, x8, lsl #2]
; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2]
; CHECK-EXPAND-NEXT: expand z0.s, p2, z0.s
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2, x11, lsl #2]
; CHECK-EXPAND-NEXT: add sp, sp, #16
; CHECK-EXPAND-NEXT: ret
%a = load <16 x float>, ptr %ap
%b = load <16 x float>, ptr %bp
%mask = fcmp oeq <16 x float> %a, %b
%load = call <16 x float> @llvm.masked.expandload.v16f32(ptr %ap, <16 x i1> %mask, <16 x float> zeroinitializer)
store <16 x float> %load, ptr %c
ret void
}
define void @masked_load_v32f32(ptr %ap, ptr %bp, ptr %c) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_load_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEXT: sub x9, sp, #288
; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: and sp, x9, #0xffffffffffffff80
; CHECK-NEXT: .cfi_def_cfa w29, 96
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: mov z5.b, z0.b[18]
; CHECK-NEXT: mov z6.b, z0.b[19]
; CHECK-NEXT: umov w13, v0.b[1]
; CHECK-NEXT: umov w3, v0.b[7]
; CHECK-NEXT: umov w4, v0.b[8]
; CHECK-NEXT: mov z7.b, z0.b[20]
; CHECK-NEXT: fmov w6, s0
; CHECK-NEXT: umov w5, v0.b[9]
; CHECK-NEXT: umov w12, v0.b[2]
; CHECK-NEXT: mov z16.b, z0.b[21]
; CHECK-NEXT: fmov w20, s5
; CHECK-NEXT: fmov w21, s6
; CHECK-NEXT: umov w18, v0.b[10]
; CHECK-NEXT: mov z17.b, z0.b[22]
; CHECK-NEXT: fmov w22, s7
; CHECK-NEXT: and w6, w6, #0x1
; CHECK-NEXT: umov w11, v0.b[3]
; CHECK-NEXT: umov w1, v0.b[11]
; CHECK-NEXT: bfi w6, w13, #1, #1
; CHECK-NEXT: ubfiz w13, w3, #7, #1
; CHECK-NEXT: ubfiz w3, w4, #8, #1
; CHECK-NEXT: mov z18.b, z0.b[23]
; CHECK-NEXT: fmov w23, s16
; CHECK-NEXT: ubfiz w4, w5, #9, #1
; CHECK-NEXT: ubfiz w5, w20, #18, #1
; CHECK-NEXT: ubfiz w20, w21, #19, #1
; CHECK-NEXT: umov w16, v0.b[12]
; CHECK-NEXT: mov z19.b, z0.b[24]
; CHECK-NEXT: fmov w24, s17
; CHECK-NEXT: bfi w6, w12, #2, #1
; CHECK-NEXT: orr w12, w13, w3
; CHECK-NEXT: ubfiz w13, w22, #20, #1
; CHECK-NEXT: umov w17, v0.b[13]
; CHECK-NEXT: mov z20.b, z0.b[25]
; CHECK-NEXT: orr w3, w5, w20
; CHECK-NEXT: ubfiz w18, w18, #10, #1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: mov z21.b, z0.b[26]
; CHECK-NEXT: fmov w25, s18
; CHECK-NEXT: orr w12, w12, w4
; CHECK-NEXT: orr w13, w3, w13
; CHECK-NEXT: ubfiz w3, w23, #21, #1
; CHECK-NEXT: umov w14, v0.b[14]
; CHECK-NEXT: fmov w26, s19
; CHECK-NEXT: ubfiz w1, w1, #11, #1
; CHECK-NEXT: bfi w6, w11, #3, #1
; CHECK-NEXT: orr w11, w12, w18
; CHECK-NEXT: ubfiz w12, w24, #22, #1
; CHECK-NEXT: umov w9, v0.b[5]
; CHECK-NEXT: fmov w27, s20
; CHECK-NEXT: orr w13, w13, w3
; CHECK-NEXT: ubfiz w16, w16, #12, #1
; CHECK-NEXT: umov w8, v0.b[6]
; CHECK-NEXT: fmov w28, s21
; CHECK-NEXT: orr w11, w11, w1
; CHECK-NEXT: orr w12, w13, w12
; CHECK-NEXT: ubfiz w13, w17, #13, #1
; CHECK-NEXT: ubfiz w17, w25, #23, #1
; CHECK-NEXT: mov z22.b, z0.b[27]
; CHECK-NEXT: bfi w6, w10, #4, #1
; CHECK-NEXT: orr w10, w11, w16
; CHECK-NEXT: ubfiz w11, w26, #24, #1
; CHECK-NEXT: mov z24.b, z0.b[29]
; CHECK-NEXT: orr w12, w12, w17
; CHECK-NEXT: orr w10, w10, w13
; CHECK-NEXT: ubfiz w13, w14, #14, #1
; CHECK-NEXT: ubfiz w14, w27, #25, #1
; CHECK-NEXT: orr w11, w12, w11
; CHECK-NEXT: bfi w6, w9, #5, #1
; CHECK-NEXT: ubfiz w9, w28, #26, #1
; CHECK-NEXT: str w8, [sp, #124] // 4-byte Spill
; CHECK-NEXT: mov z23.b, z0.b[28]
; CHECK-NEXT: fmov w30, s22
; CHECK-NEXT: orr w11, w11, w14
; CHECK-NEXT: umov w15, v0.b[15]
; CHECK-NEXT: mov z3.b, z0.b[16]
; CHECK-NEXT: orr w9, w11, w9
; CHECK-NEXT: ldr w11, [sp, #124] // 4-byte Reload
; CHECK-NEXT: mov z4.b, z0.b[17]
; CHECK-NEXT: mov z2.b, z0.b[30]
; CHECK-NEXT: orr w10, w10, w13
; CHECK-NEXT: fmov w13, s24
; CHECK-NEXT: fmov w8, s23
; CHECK-NEXT: bfi w6, w11, #6, #1
; CHECK-NEXT: ubfiz w11, w30, #27, #1
; CHECK-NEXT: fmov w7, s3
; CHECK-NEXT: ubfiz w12, w15, #15, #1
; CHECK-NEXT: fmov w19, s4
; CHECK-NEXT: mov z1.b, z0.b[31]
; CHECK-NEXT: orr w9, w9, w11
; CHECK-NEXT: ubfiz w11, w13, #29, #1
; CHECK-NEXT: fmov w13, s2
; CHECK-NEXT: ubfiz w8, w8, #28, #1
; CHECK-NEXT: orr w10, w10, w12
; CHECK-NEXT: ubfiz w12, w7, #16, #1
; CHECK-NEXT: ubfiz w14, w19, #17, #1
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: ubfiz w9, w13, #30, #1
; CHECK-NEXT: orr w10, w10, w12
; CHECK-NEXT: orr w8, w8, w11
; CHECK-NEXT: orr w10, w10, w14
; CHECK-NEXT: orr w8, w8, w9
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: orr w10, w6, w10
; CHECK-NEXT: orr w8, w10, w8
; CHECK-NEXT: orr w8, w8, w9, lsl #31
; CHECK-NEXT: tbz w8, #0, .LBB5_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ldr s0, [x0], #4
; CHECK-NEXT: stp xzr, xzr, [sp, #240]
; CHECK-NEXT: stp xzr, xzr, [sp, #224]
; CHECK-NEXT: add x9, sp, #128
; CHECK-NEXT: stp xzr, xzr, [sp, #208]
; CHECK-NEXT: stp xzr, xzr, [sp, #192]
; CHECK-NEXT: stp xzr, xzr, [sp, #176]
; CHECK-NEXT: stp xzr, xzr, [sp, #160]
; CHECK-NEXT: stp xzr, xzr, [sp, #144]
; CHECK-NEXT: str xzr, [sp, #136]
; CHECK-NEXT: str wzr, [sp, #132]
; CHECK-NEXT: str s0, [sp, #128]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9]
; CHECK-NEXT: tbnz w8, #1, .LBB5_3
; CHECK-NEXT: b .LBB5_4
; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: tbz w8, #1, .LBB5_4
; CHECK-NEXT: .LBB5_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: .LBB5_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB5_36
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB5_37
; CHECK-NEXT: .LBB5_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB5_38
; CHECK-NEXT: .LBB5_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB5_39
; CHECK-NEXT: .LBB5_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB5_40
; CHECK-NEXT: .LBB5_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB5_41
; CHECK-NEXT: .LBB5_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB5_42
; CHECK-NEXT: .LBB5_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB5_43
; CHECK-NEXT: .LBB5_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB5_44
; CHECK-NEXT: .LBB5_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB5_45
; CHECK-NEXT: .LBB5_14: // %else42
; CHECK-NEXT: tbnz w8, #12, .LBB5_46
; CHECK-NEXT: .LBB5_15: // %else46
; CHECK-NEXT: tbnz w8, #13, .LBB5_47
; CHECK-NEXT: .LBB5_16: // %else50
; CHECK-NEXT: tbnz w8, #14, .LBB5_48
; CHECK-NEXT: .LBB5_17: // %else54
; CHECK-NEXT: tbnz w8, #15, .LBB5_49
; CHECK-NEXT: .LBB5_18: // %else58
; CHECK-NEXT: tbnz w8, #16, .LBB5_50
; CHECK-NEXT: .LBB5_19: // %else62
; CHECK-NEXT: tbnz w8, #17, .LBB5_51
; CHECK-NEXT: .LBB5_20: // %else66
; CHECK-NEXT: tbnz w8, #18, .LBB5_52
; CHECK-NEXT: .LBB5_21: // %else70
; CHECK-NEXT: tbnz w8, #19, .LBB5_53
; CHECK-NEXT: .LBB5_22: // %else74
; CHECK-NEXT: tbnz w8, #20, .LBB5_54
; CHECK-NEXT: .LBB5_23: // %else78
; CHECK-NEXT: tbnz w8, #21, .LBB5_55
; CHECK-NEXT: .LBB5_24: // %else82
; CHECK-NEXT: tbnz w8, #22, .LBB5_56
; CHECK-NEXT: .LBB5_25: // %else86
; CHECK-NEXT: tbnz w8, #23, .LBB5_57
; CHECK-NEXT: .LBB5_26: // %else90
; CHECK-NEXT: tbnz w8, #24, .LBB5_58
; CHECK-NEXT: .LBB5_27: // %else94
; CHECK-NEXT: tbnz w8, #25, .LBB5_59
; CHECK-NEXT: .LBB5_28: // %else98
; CHECK-NEXT: tbnz w8, #26, .LBB5_60
; CHECK-NEXT: .LBB5_29: // %else102
; CHECK-NEXT: tbnz w8, #27, .LBB5_61
; CHECK-NEXT: .LBB5_30: // %else106
; CHECK-NEXT: tbnz w8, #28, .LBB5_62
; CHECK-NEXT: .LBB5_31: // %else110
; CHECK-NEXT: tbnz w8, #29, .LBB5_63
; CHECK-NEXT: .LBB5_32: // %else114
; CHECK-NEXT: tbnz w8, #30, .LBB5_64
; CHECK-NEXT: .LBB5_33: // %else118
; CHECK-NEXT: tbz w8, #31, .LBB5_35
; CHECK-NEXT: .LBB5_34: // %cond.load121
; CHECK-NEXT: mov w8, #31 // =0x1f
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0]
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: .LBB5_35: // %else122
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB5_36: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #3, .LBB5_6
; CHECK-NEXT: .LBB5_37: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #4, .LBB5_7
; CHECK-NEXT: .LBB5_38: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #5, .LBB5_8
; CHECK-NEXT: .LBB5_39: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #6, .LBB5_9
; CHECK-NEXT: .LBB5_40: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #7, .LBB5_10
; CHECK-NEXT: .LBB5_41: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #8, .LBB5_11
; CHECK-NEXT: .LBB5_42: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #9, .LBB5_12
; CHECK-NEXT: .LBB5_43: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #10, .LBB5_13
; CHECK-NEXT: .LBB5_44: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #11, .LBB5_14
; CHECK-NEXT: .LBB5_45: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #12, .LBB5_15
; CHECK-NEXT: .LBB5_46: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #13, .LBB5_16
; CHECK-NEXT: .LBB5_47: // %cond.load49
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #14, .LBB5_17
; CHECK-NEXT: .LBB5_48: // %cond.load53
; CHECK-NEXT: mov w9, #14 // =0xe
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #15, .LBB5_18
; CHECK-NEXT: .LBB5_49: // %cond.load57
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #16, .LBB5_19
; CHECK-NEXT: .LBB5_50: // %cond.load61
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #17, .LBB5_20
; CHECK-NEXT: .LBB5_51: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #18, .LBB5_21
; CHECK-NEXT: .LBB5_52: // %cond.load69
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #19, .LBB5_22
; CHECK-NEXT: .LBB5_53: // %cond.load73
; CHECK-NEXT: mov w9, #19 // =0x13
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #20, .LBB5_23
; CHECK-NEXT: .LBB5_54: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #21, .LBB5_24
; CHECK-NEXT: .LBB5_55: // %cond.load81
; CHECK-NEXT: mov w9, #21 // =0x15
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #22, .LBB5_25
; CHECK-NEXT: .LBB5_56: // %cond.load85
; CHECK-NEXT: mov w9, #22 // =0x16
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #23, .LBB5_26
; CHECK-NEXT: .LBB5_57: // %cond.load89
; CHECK-NEXT: mov w9, #23 // =0x17
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #24, .LBB5_27
; CHECK-NEXT: .LBB5_58: // %cond.load93
; CHECK-NEXT: mov w9, #24 // =0x18
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #25, .LBB5_28
; CHECK-NEXT: .LBB5_59: // %cond.load97
; CHECK-NEXT: mov w9, #25 // =0x19
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #26, .LBB5_29
; CHECK-NEXT: .LBB5_60: // %cond.load101
; CHECK-NEXT: mov w9, #26 // =0x1a
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #27, .LBB5_30
; CHECK-NEXT: .LBB5_61: // %cond.load105
; CHECK-NEXT: mov w9, #27 // =0x1b
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #28, .LBB5_31
; CHECK-NEXT: .LBB5_62: // %cond.load109
; CHECK-NEXT: mov w9, #28 // =0x1c
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #29, .LBB5_32
; CHECK-NEXT: .LBB5_63: // %cond.load113
; CHECK-NEXT: mov w9, #29 // =0x1d
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #30, .LBB5_33
; CHECK-NEXT: .LBB5_64: // %cond.load117
; CHECK-NEXT: mov w9, #30 // =0x1e
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbnz w8, #31, .LBB5_34
; CHECK-NEXT: b .LBB5_35
;
; CHECK-EXPAND-LABEL: masked_load_v32f32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl32
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s
; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%a = load <32 x float>, ptr %ap
%b = load <32 x float>, ptr %bp
%mask = fcmp oeq <32 x float> %a, %b
%load = call <32 x float> @llvm.masked.expandload.v32f32(ptr %ap, <32 x i1> %mask, <32 x float> zeroinitializer)
store <32 x float> %load, ptr %c
ret void
}
define void @masked_load_v64f32(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEXT: sub x9, sp, #672
; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: and sp, x9, #0xffffffffffffff00
; CHECK-NEXT: .cfi_def_cfa w29, 96
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: umov w12, v0.b[1]
; CHECK-NEXT: mov z3.b, z0.b[18]
; CHECK-NEXT: mov z4.b, z0.b[19]
; CHECK-NEXT: fmov w28, s0
; CHECK-NEXT: umov w17, v0.b[7]
; CHECK-NEXT: umov w4, v0.b[8]
; CHECK-NEXT: mov z5.b, z0.b[20]
; CHECK-NEXT: umov w13, v0.b[2]
; CHECK-NEXT: umov w3, v0.b[9]
; CHECK-NEXT: mov z1.b, z0.b[16]
; CHECK-NEXT: mov z6.b, z0.b[21]
; CHECK-NEXT: fmov w19, s3
; CHECK-NEXT: fmov w20, s4
; CHECK-NEXT: and x28, x28, #0x1
; CHECK-NEXT: umov w18, v0.b[10]
; CHECK-NEXT: mov z2.b, z0.b[17]
; CHECK-NEXT: mov z7.b, z0.b[22]
; CHECK-NEXT: fmov w21, s5
; CHECK-NEXT: bfi x28, x12, #1, #1
; CHECK-NEXT: umov w11, v0.b[3]
; CHECK-NEXT: umov w1, v0.b[11]
; CHECK-NEXT: fmov w6, s1
; CHECK-NEXT: mov z1.b, z0.b[23]
; CHECK-NEXT: fmov w22, s6
; CHECK-NEXT: ubfiz x12, x17, #7, #1
; CHECK-NEXT: ubfiz x17, x4, #8, #1
; CHECK-NEXT: ubfiz x4, x19, #18, #1
; CHECK-NEXT: ubfiz x19, x20, #19, #1
; CHECK-NEXT: bfi x28, x13, #2, #1
; CHECK-NEXT: ubfiz x13, x3, #9, #1
; CHECK-NEXT: umov w15, v0.b[12]
; CHECK-NEXT: fmov w7, s2
; CHECK-NEXT: mov z2.b, z0.b[24]
; CHECK-NEXT: fmov w23, s7
; CHECK-NEXT: orr x12, x12, x17
; CHECK-NEXT: ubfiz x3, x21, #20, #1
; CHECK-NEXT: fmov w24, s1
; CHECK-NEXT: orr x17, x4, x19
; CHECK-NEXT: orr x12, x12, x13
; CHECK-NEXT: ubfiz x13, x18, #10, #1
; CHECK-NEXT: ubfiz x18, x22, #21, #1
; CHECK-NEXT: orr x17, x17, x3
; CHECK-NEXT: bfi x28, x11, #3, #1
; CHECK-NEXT: ubfiz x11, x1, #11, #1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: umov w8, v0.b[5]
; CHECK-NEXT: umov w16, v0.b[13]
; CHECK-NEXT: fmov w25, s2
; CHECK-NEXT: orr x12, x12, x13
; CHECK-NEXT: orr x13, x17, x18
; CHECK-NEXT: ubfiz x17, x23, #22, #1
; CHECK-NEXT: orr x11, x12, x11
; CHECK-NEXT: ubfiz x12, x15, #12, #1
; CHECK-NEXT: ubfiz x15, x24, #23, #1
; CHECK-NEXT: umov w14, v0.b[14]
; CHECK-NEXT: mov z16.b, z0.b[25]
; CHECK-NEXT: orr x13, x13, x17
; CHECK-NEXT: umov w5, v0.b[15]
; CHECK-NEXT: orr x11, x11, x12
; CHECK-NEXT: orr x12, x13, x15
; CHECK-NEXT: ubfiz x13, x25, #24, #1
; CHECK-NEXT: str x8, [sp, #240] // 8-byte Spill
; CHECK-NEXT: umov w8, v0.b[6]
; CHECK-NEXT: mov z4.b, z0.b[26]
; CHECK-NEXT: bfi x28, x10, #4, #1
; CHECK-NEXT: ubfiz x10, x16, #13, #1
; CHECK-NEXT: mov z5.b, z0.b[27]
; CHECK-NEXT: fmov w26, s16
; CHECK-NEXT: orr x12, x12, x13
; CHECK-NEXT: ldr x13, [sp, #240] // 8-byte Reload
; CHECK-NEXT: mov z6.b, z0.b[28]
; CHECK-NEXT: orr x10, x11, x10
; CHECK-NEXT: ubfiz x11, x14, #14, #1
; CHECK-NEXT: mov z7.b, z0.b[29]
; CHECK-NEXT: fmov w27, s4
; CHECK-NEXT: bfi x28, x13, #5, #1
; CHECK-NEXT: ubfiz x13, x5, #15, #1
; CHECK-NEXT: str x8, [sp, #248] // 8-byte Spill
; CHECK-NEXT: mov z17.b, z0.b[30]
; CHECK-NEXT: fmov w30, s5
; CHECK-NEXT: ubfiz x14, x26, #25, #1
; CHECK-NEXT: orr x10, x10, x11
; CHECK-NEXT: fmov w8, s6
; CHECK-NEXT: fmov w9, s7
; CHECK-NEXT: orr x10, x10, x13
; CHECK-NEXT: ldr x13, [sp, #248] // 8-byte Reload
; CHECK-NEXT: orr x11, x12, x14
; CHECK-NEXT: ubfiz x12, x27, #26, #1
; CHECK-NEXT: fmov w20, s17
; CHECK-NEXT: mov z3.b, z0.b[31]
; CHECK-NEXT: bfi x28, x13, #6, #1
; CHECK-NEXT: ubfiz x13, x30, #27, #1
; CHECK-NEXT: ubfiz x8, x8, #28, #1
; CHECK-NEXT: orr x11, x11, x12
; CHECK-NEXT: ubfiz x12, x6, #16, #1
; CHECK-NEXT: ubfiz x9, x9, #29, #1
; CHECK-NEXT: orr x11, x11, x13
; CHECK-NEXT: ubfiz x13, x7, #17, #1
; CHECK-NEXT: mov z2.b, z0.b[32]
; CHECK-NEXT: orr x10, x10, x12
; CHECK-NEXT: orr x8, x11, x8
; CHECK-NEXT: ubfiz x11, x20, #30, #1
; CHECK-NEXT: fmov w12, s3
; CHECK-NEXT: orr x10, x10, x13
; CHECK-NEXT: orr x8, x8, x9
; CHECK-NEXT: orr x9, x28, x10
; CHECK-NEXT: orr x8, x8, x11
; CHECK-NEXT: mov z1.b, z0.b[33]
; CHECK-NEXT: orr x8, x9, x8
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[34]
; CHECK-NEXT: lsl w10, w12, #31
; CHECK-NEXT: orr x8, x8, x10
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #32
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[35]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #33
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[36]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #34
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[37]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #35
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[38]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #36
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[39]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #37
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[40]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #38
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[41]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #39
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[42]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #40
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[43]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #41
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[44]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #42
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[45]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #43
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[46]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #44
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[47]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #45
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[48]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #46
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[49]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #47
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[50]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #48
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[51]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #49
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[52]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #50
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[53]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #51
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[54]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #52
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[55]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #53
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[56]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #54
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[57]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #55
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[58]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #56
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[59]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #57
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[60]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #58
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[61]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: fmov w10, s1
; CHECK-NEXT: orr x8, x8, x9, lsl #59
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[62]
; CHECK-NEXT: mov z0.b, z0.b[63]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #60
; CHECK-NEXT: and w9, w10, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #61
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #62
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: orr x8, x8, x9, lsl #63
; CHECK-NEXT: tbz w8, #0, .LBB6_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ldr s0, [x0], #4
; CHECK-NEXT: stp xzr, xzr, [sp, #496]
; CHECK-NEXT: stp xzr, xzr, [sp, #480]
; CHECK-NEXT: add x9, sp, #256
; CHECK-NEXT: stp xzr, xzr, [sp, #464]
; CHECK-NEXT: stp xzr, xzr, [sp, #448]
; CHECK-NEXT: stp xzr, xzr, [sp, #432]
; CHECK-NEXT: stp xzr, xzr, [sp, #416]
; CHECK-NEXT: stp xzr, xzr, [sp, #400]
; CHECK-NEXT: stp xzr, xzr, [sp, #384]
; CHECK-NEXT: stp xzr, xzr, [sp, #368]
; CHECK-NEXT: stp xzr, xzr, [sp, #352]
; CHECK-NEXT: stp xzr, xzr, [sp, #336]
; CHECK-NEXT: stp xzr, xzr, [sp, #320]
; CHECK-NEXT: stp xzr, xzr, [sp, #304]
; CHECK-NEXT: stp xzr, xzr, [sp, #288]
; CHECK-NEXT: stp xzr, xzr, [sp, #272]
; CHECK-NEXT: str xzr, [sp, #264]
; CHECK-NEXT: str wzr, [sp, #260]
; CHECK-NEXT: str s0, [sp, #256]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9]
; CHECK-NEXT: tbnz w8, #1, .LBB6_3
; CHECK-NEXT: b .LBB6_4
; CHECK-NEXT: .LBB6_2:
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: tbz w8, #1, .LBB6_4
; CHECK-NEXT: .LBB6_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: .LBB6_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB6_68
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB6_69
; CHECK-NEXT: .LBB6_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB6_70
; CHECK-NEXT: .LBB6_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB6_71
; CHECK-NEXT: .LBB6_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB6_72
; CHECK-NEXT: .LBB6_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB6_73
; CHECK-NEXT: .LBB6_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB6_74
; CHECK-NEXT: .LBB6_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB6_75
; CHECK-NEXT: .LBB6_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB6_76
; CHECK-NEXT: .LBB6_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB6_77
; CHECK-NEXT: .LBB6_14: // %else42
; CHECK-NEXT: tbnz w8, #12, .LBB6_78
; CHECK-NEXT: .LBB6_15: // %else46
; CHECK-NEXT: tbnz w8, #13, .LBB6_79
; CHECK-NEXT: .LBB6_16: // %else50
; CHECK-NEXT: tbnz w8, #14, .LBB6_80
; CHECK-NEXT: .LBB6_17: // %else54
; CHECK-NEXT: tbnz w8, #15, .LBB6_81
; CHECK-NEXT: .LBB6_18: // %else58
; CHECK-NEXT: tbnz w8, #16, .LBB6_82
; CHECK-NEXT: .LBB6_19: // %else62
; CHECK-NEXT: tbnz w8, #17, .LBB6_83
; CHECK-NEXT: .LBB6_20: // %else66
; CHECK-NEXT: tbnz w8, #18, .LBB6_84
; CHECK-NEXT: .LBB6_21: // %else70
; CHECK-NEXT: tbnz w8, #19, .LBB6_85
; CHECK-NEXT: .LBB6_22: // %else74
; CHECK-NEXT: tbnz w8, #20, .LBB6_86
; CHECK-NEXT: .LBB6_23: // %else78
; CHECK-NEXT: tbnz w8, #21, .LBB6_87
; CHECK-NEXT: .LBB6_24: // %else82
; CHECK-NEXT: tbnz w8, #22, .LBB6_88
; CHECK-NEXT: .LBB6_25: // %else86
; CHECK-NEXT: tbnz w8, #23, .LBB6_89
; CHECK-NEXT: .LBB6_26: // %else90
; CHECK-NEXT: tbnz w8, #24, .LBB6_90
; CHECK-NEXT: .LBB6_27: // %else94
; CHECK-NEXT: tbnz w8, #25, .LBB6_91
; CHECK-NEXT: .LBB6_28: // %else98
; CHECK-NEXT: tbnz w8, #26, .LBB6_92
; CHECK-NEXT: .LBB6_29: // %else102
; CHECK-NEXT: tbnz w8, #27, .LBB6_93
; CHECK-NEXT: .LBB6_30: // %else106
; CHECK-NEXT: tbnz w8, #28, .LBB6_94
; CHECK-NEXT: .LBB6_31: // %else110
; CHECK-NEXT: tbnz w8, #29, .LBB6_95
; CHECK-NEXT: .LBB6_32: // %else114
; CHECK-NEXT: tbnz w8, #30, .LBB6_96
; CHECK-NEXT: .LBB6_33: // %else118
; CHECK-NEXT: tbnz w8, #31, .LBB6_97
; CHECK-NEXT: .LBB6_34: // %else122
; CHECK-NEXT: tbnz x8, #32, .LBB6_98
; CHECK-NEXT: .LBB6_35: // %else126
; CHECK-NEXT: tbnz x8, #33, .LBB6_99
; CHECK-NEXT: .LBB6_36: // %else130
; CHECK-NEXT: tbnz x8, #34, .LBB6_100
; CHECK-NEXT: .LBB6_37: // %else134
; CHECK-NEXT: tbnz x8, #35, .LBB6_101
; CHECK-NEXT: .LBB6_38: // %else138
; CHECK-NEXT: tbnz x8, #36, .LBB6_102
; CHECK-NEXT: .LBB6_39: // %else142
; CHECK-NEXT: tbnz x8, #37, .LBB6_103
; CHECK-NEXT: .LBB6_40: // %else146
; CHECK-NEXT: tbnz x8, #38, .LBB6_104
; CHECK-NEXT: .LBB6_41: // %else150
; CHECK-NEXT: tbnz x8, #39, .LBB6_105
; CHECK-NEXT: .LBB6_42: // %else154
; CHECK-NEXT: tbnz x8, #40, .LBB6_106
; CHECK-NEXT: .LBB6_43: // %else158
; CHECK-NEXT: tbnz x8, #41, .LBB6_107
; CHECK-NEXT: .LBB6_44: // %else162
; CHECK-NEXT: tbnz x8, #42, .LBB6_108
; CHECK-NEXT: .LBB6_45: // %else166
; CHECK-NEXT: tbnz x8, #43, .LBB6_109
; CHECK-NEXT: .LBB6_46: // %else170
; CHECK-NEXT: tbnz x8, #44, .LBB6_110
; CHECK-NEXT: .LBB6_47: // %else174
; CHECK-NEXT: tbnz x8, #45, .LBB6_111
; CHECK-NEXT: .LBB6_48: // %else178
; CHECK-NEXT: tbnz x8, #46, .LBB6_112
; CHECK-NEXT: .LBB6_49: // %else182
; CHECK-NEXT: tbnz x8, #47, .LBB6_113
; CHECK-NEXT: .LBB6_50: // %else186
; CHECK-NEXT: tbnz x8, #48, .LBB6_114
; CHECK-NEXT: .LBB6_51: // %else190
; CHECK-NEXT: tbnz x8, #49, .LBB6_115
; CHECK-NEXT: .LBB6_52: // %else194
; CHECK-NEXT: tbnz x8, #50, .LBB6_116
; CHECK-NEXT: .LBB6_53: // %else198
; CHECK-NEXT: tbnz x8, #51, .LBB6_117
; CHECK-NEXT: .LBB6_54: // %else202
; CHECK-NEXT: tbnz x8, #52, .LBB6_118
; CHECK-NEXT: .LBB6_55: // %else206
; CHECK-NEXT: tbnz x8, #53, .LBB6_119
; CHECK-NEXT: .LBB6_56: // %else210
; CHECK-NEXT: tbnz x8, #54, .LBB6_120
; CHECK-NEXT: .LBB6_57: // %else214
; CHECK-NEXT: tbnz x8, #55, .LBB6_121
; CHECK-NEXT: .LBB6_58: // %else218
; CHECK-NEXT: tbnz x8, #56, .LBB6_122
; CHECK-NEXT: .LBB6_59: // %else222
; CHECK-NEXT: tbnz x8, #57, .LBB6_123
; CHECK-NEXT: .LBB6_60: // %else226
; CHECK-NEXT: tbnz x8, #58, .LBB6_124
; CHECK-NEXT: .LBB6_61: // %else230
; CHECK-NEXT: tbnz x8, #59, .LBB6_125
; CHECK-NEXT: .LBB6_62: // %else234
; CHECK-NEXT: tbnz x8, #60, .LBB6_126
; CHECK-NEXT: .LBB6_63: // %else238
; CHECK-NEXT: tbnz x8, #61, .LBB6_127
; CHECK-NEXT: .LBB6_64: // %else242
; CHECK-NEXT: tbnz x8, #62, .LBB6_128
; CHECK-NEXT: .LBB6_65: // %else246
; CHECK-NEXT: tbz x8, #63, .LBB6_67
; CHECK-NEXT: .LBB6_66: // %cond.load249
; CHECK-NEXT: mov w8, #63 // =0x3f
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0]
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: .LBB6_67: // %else250
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB6_68: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #3, .LBB6_6
; CHECK-NEXT: .LBB6_69: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #4, .LBB6_7
; CHECK-NEXT: .LBB6_70: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #5, .LBB6_8
; CHECK-NEXT: .LBB6_71: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #6, .LBB6_9
; CHECK-NEXT: .LBB6_72: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #7, .LBB6_10
; CHECK-NEXT: .LBB6_73: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #8, .LBB6_11
; CHECK-NEXT: .LBB6_74: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #9, .LBB6_12
; CHECK-NEXT: .LBB6_75: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #10, .LBB6_13
; CHECK-NEXT: .LBB6_76: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #11, .LBB6_14
; CHECK-NEXT: .LBB6_77: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #12, .LBB6_15
; CHECK-NEXT: .LBB6_78: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #13, .LBB6_16
; CHECK-NEXT: .LBB6_79: // %cond.load49
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #14, .LBB6_17
; CHECK-NEXT: .LBB6_80: // %cond.load53
; CHECK-NEXT: mov w9, #14 // =0xe
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #15, .LBB6_18
; CHECK-NEXT: .LBB6_81: // %cond.load57
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #16, .LBB6_19
; CHECK-NEXT: .LBB6_82: // %cond.load61
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #17, .LBB6_20
; CHECK-NEXT: .LBB6_83: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #18, .LBB6_21
; CHECK-NEXT: .LBB6_84: // %cond.load69
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #19, .LBB6_22
; CHECK-NEXT: .LBB6_85: // %cond.load73
; CHECK-NEXT: mov w9, #19 // =0x13
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #20, .LBB6_23
; CHECK-NEXT: .LBB6_86: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #21, .LBB6_24
; CHECK-NEXT: .LBB6_87: // %cond.load81
; CHECK-NEXT: mov w9, #21 // =0x15
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #22, .LBB6_25
; CHECK-NEXT: .LBB6_88: // %cond.load85
; CHECK-NEXT: mov w9, #22 // =0x16
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #23, .LBB6_26
; CHECK-NEXT: .LBB6_89: // %cond.load89
; CHECK-NEXT: mov w9, #23 // =0x17
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #24, .LBB6_27
; CHECK-NEXT: .LBB6_90: // %cond.load93
; CHECK-NEXT: mov w9, #24 // =0x18
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #25, .LBB6_28
; CHECK-NEXT: .LBB6_91: // %cond.load97
; CHECK-NEXT: mov w9, #25 // =0x19
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #26, .LBB6_29
; CHECK-NEXT: .LBB6_92: // %cond.load101
; CHECK-NEXT: mov w9, #26 // =0x1a
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #27, .LBB6_30
; CHECK-NEXT: .LBB6_93: // %cond.load105
; CHECK-NEXT: mov w9, #27 // =0x1b
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #28, .LBB6_31
; CHECK-NEXT: .LBB6_94: // %cond.load109
; CHECK-NEXT: mov w9, #28 // =0x1c
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #29, .LBB6_32
; CHECK-NEXT: .LBB6_95: // %cond.load113
; CHECK-NEXT: mov w9, #29 // =0x1d
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #30, .LBB6_33
; CHECK-NEXT: .LBB6_96: // %cond.load117
; CHECK-NEXT: mov w9, #30 // =0x1e
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz w8, #31, .LBB6_34
; CHECK-NEXT: .LBB6_97: // %cond.load121
; CHECK-NEXT: mov w9, #31 // =0x1f
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #32, .LBB6_35
; CHECK-NEXT: .LBB6_98: // %cond.load125
; CHECK-NEXT: mov w9, #32 // =0x20
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #33, .LBB6_36
; CHECK-NEXT: .LBB6_99: // %cond.load129
; CHECK-NEXT: mov w9, #33 // =0x21
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #34, .LBB6_37
; CHECK-NEXT: .LBB6_100: // %cond.load133
; CHECK-NEXT: mov w9, #34 // =0x22
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #35, .LBB6_38
; CHECK-NEXT: .LBB6_101: // %cond.load137
; CHECK-NEXT: mov w9, #35 // =0x23
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #36, .LBB6_39
; CHECK-NEXT: .LBB6_102: // %cond.load141
; CHECK-NEXT: mov w9, #36 // =0x24
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #37, .LBB6_40
; CHECK-NEXT: .LBB6_103: // %cond.load145
; CHECK-NEXT: mov w9, #37 // =0x25
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #38, .LBB6_41
; CHECK-NEXT: .LBB6_104: // %cond.load149
; CHECK-NEXT: mov w9, #38 // =0x26
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #39, .LBB6_42
; CHECK-NEXT: .LBB6_105: // %cond.load153
; CHECK-NEXT: mov w9, #39 // =0x27
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #40, .LBB6_43
; CHECK-NEXT: .LBB6_106: // %cond.load157
; CHECK-NEXT: mov w9, #40 // =0x28
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #41, .LBB6_44
; CHECK-NEXT: .LBB6_107: // %cond.load161
; CHECK-NEXT: mov w9, #41 // =0x29
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #42, .LBB6_45
; CHECK-NEXT: .LBB6_108: // %cond.load165
; CHECK-NEXT: mov w9, #42 // =0x2a
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #43, .LBB6_46
; CHECK-NEXT: .LBB6_109: // %cond.load169
; CHECK-NEXT: mov w9, #43 // =0x2b
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #44, .LBB6_47
; CHECK-NEXT: .LBB6_110: // %cond.load173
; CHECK-NEXT: mov w9, #44 // =0x2c
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #45, .LBB6_48
; CHECK-NEXT: .LBB6_111: // %cond.load177
; CHECK-NEXT: mov w9, #45 // =0x2d
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #46, .LBB6_49
; CHECK-NEXT: .LBB6_112: // %cond.load181
; CHECK-NEXT: mov w9, #46 // =0x2e
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #47, .LBB6_50
; CHECK-NEXT: .LBB6_113: // %cond.load185
; CHECK-NEXT: mov w9, #47 // =0x2f
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #48, .LBB6_51
; CHECK-NEXT: .LBB6_114: // %cond.load189
; CHECK-NEXT: mov w9, #48 // =0x30
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #49, .LBB6_52
; CHECK-NEXT: .LBB6_115: // %cond.load193
; CHECK-NEXT: mov w9, #49 // =0x31
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #50, .LBB6_53
; CHECK-NEXT: .LBB6_116: // %cond.load197
; CHECK-NEXT: mov w9, #50 // =0x32
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #51, .LBB6_54
; CHECK-NEXT: .LBB6_117: // %cond.load201
; CHECK-NEXT: mov w9, #51 // =0x33
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #52, .LBB6_55
; CHECK-NEXT: .LBB6_118: // %cond.load205
; CHECK-NEXT: mov w9, #52 // =0x34
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #53, .LBB6_56
; CHECK-NEXT: .LBB6_119: // %cond.load209
; CHECK-NEXT: mov w9, #53 // =0x35
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #54, .LBB6_57
; CHECK-NEXT: .LBB6_120: // %cond.load213
; CHECK-NEXT: mov w9, #54 // =0x36
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #55, .LBB6_58
; CHECK-NEXT: .LBB6_121: // %cond.load217
; CHECK-NEXT: mov w9, #55 // =0x37
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #56, .LBB6_59
; CHECK-NEXT: .LBB6_122: // %cond.load221
; CHECK-NEXT: mov w9, #56 // =0x38
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #57, .LBB6_60
; CHECK-NEXT: .LBB6_123: // %cond.load225
; CHECK-NEXT: mov w9, #57 // =0x39
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #58, .LBB6_61
; CHECK-NEXT: .LBB6_124: // %cond.load229
; CHECK-NEXT: mov w9, #58 // =0x3a
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #59, .LBB6_62
; CHECK-NEXT: .LBB6_125: // %cond.load233
; CHECK-NEXT: mov w9, #59 // =0x3b
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #60, .LBB6_63
; CHECK-NEXT: .LBB6_126: // %cond.load237
; CHECK-NEXT: mov w9, #60 // =0x3c
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #61, .LBB6_64
; CHECK-NEXT: .LBB6_127: // %cond.load241
; CHECK-NEXT: mov w9, #61 // =0x3d
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbz x8, #62, .LBB6_65
; CHECK-NEXT: .LBB6_128: // %cond.load245
; CHECK-NEXT: mov w9, #62 // =0x3e
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; CHECK-NEXT: ldr s1, [x0], #4
; CHECK-NEXT: mov z0.s, p2/m, s1
; CHECK-NEXT: tbnz x8, #63, .LBB6_66
; CHECK-NEXT: b .LBB6_67
;
; CHECK-EXPAND-LABEL: masked_load_v64f32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl64
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s
; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%a = load <64 x float>, ptr %ap
%b = load <64 x float>, ptr %bp
%mask = fcmp oeq <64 x float> %a, %b
%load = call <64 x float> @llvm.masked.expandload.v64f32(ptr %ap, <64 x i1> %mask, <64 x float> zeroinitializer)
store <64 x float> %load, ptr %c
ret void
}
define void @masked_load_v64i8(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #128
; VBITS_GE_256-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 128
; VBITS_GE_256-NEXT: .cfi_offset w19, -8
; VBITS_GE_256-NEXT: .cfi_offset w20, -16
; VBITS_GE_256-NEXT: .cfi_offset w21, -24
; VBITS_GE_256-NEXT: .cfi_offset w22, -32
; VBITS_GE_256-NEXT: .cfi_offset w23, -40
; VBITS_GE_256-NEXT: .cfi_offset w24, -48
; VBITS_GE_256-NEXT: .cfi_offset w25, -56
; VBITS_GE_256-NEXT: .cfi_offset w26, -64
; VBITS_GE_256-NEXT: .cfi_offset w27, -72
; VBITS_GE_256-NEXT: .cfi_offset w28, -80
; VBITS_GE_256-NEXT: .cfi_offset w30, -88
; VBITS_GE_256-NEXT: .cfi_offset w29, -96
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1, x8]
; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: umov w13, v0.b[1]
; VBITS_GE_256-NEXT: fmov w8, s0
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: umov w16, v0.b[3]
; VBITS_GE_256-NEXT: umov w12, v0.b[4]
; VBITS_GE_256-NEXT: umov w11, v0.b[5]
; VBITS_GE_256-NEXT: umov w14, v0.b[7]
; VBITS_GE_256-NEXT: umov w15, v0.b[8]
; VBITS_GE_256-NEXT: umov w9, v0.b[6]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: umov w17, v0.b[9]
; VBITS_GE_256-NEXT: mov z16.b, z0.b[17]
; VBITS_GE_256-NEXT: bfi w8, w13, #1, #1
; VBITS_GE_256-NEXT: umov w18, v0.b[10]
; VBITS_GE_256-NEXT: mov z24.b, z0.b[29]
; VBITS_GE_256-NEXT: umov w1, v0.b[11]
; VBITS_GE_256-NEXT: umov w3, v0.b[12]
; VBITS_GE_256-NEXT: mov z17.b, z0.b[18]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: mov z18.b, z0.b[19]
; VBITS_GE_256-NEXT: fmov w19, s16
; VBITS_GE_256-NEXT: ubfiz w22, w14, #7, #1
; VBITS_GE_256-NEXT: ubfiz w23, w15, #8, #1
; VBITS_GE_256-NEXT: umov w4, v0.b[13]
; VBITS_GE_256-NEXT: bfi w8, w16, #3, #1
; VBITS_GE_256-NEXT: mov z19.b, z0.b[20]
; VBITS_GE_256-NEXT: mov z25.b, z0.b[30]
; VBITS_GE_256-NEXT: ubfiz w17, w17, #9, #1
; VBITS_GE_256-NEXT: umov w5, v0.b[14]
; VBITS_GE_256-NEXT: mov z20.b, z0.b[21]
; VBITS_GE_256-NEXT: bfi w8, w12, #4, #1
; VBITS_GE_256-NEXT: ubfiz w10, w18, #10, #1
; VBITS_GE_256-NEXT: orr w18, w22, w23
; VBITS_GE_256-NEXT: mov z21.b, z0.b[22]
; VBITS_GE_256-NEXT: str w19, [sp, #16] // 4-byte Spill
; VBITS_GE_256-NEXT: fmov w19, s17
; VBITS_GE_256-NEXT: bfi w8, w11, #5, #1
; VBITS_GE_256-NEXT: fmov w20, s18
; VBITS_GE_256-NEXT: ubfiz w1, w1, #11, #1
; VBITS_GE_256-NEXT: orr w17, w18, w17
; VBITS_GE_256-NEXT: mov z22.b, z0.b[23]
; VBITS_GE_256-NEXT: fmov w21, s19
; VBITS_GE_256-NEXT: bfi w8, w9, #6, #1
; VBITS_GE_256-NEXT: fmov w9, s24
; VBITS_GE_256-NEXT: ubfiz w16, w3, #12, #1
; VBITS_GE_256-NEXT: orr w10, w17, w10
; VBITS_GE_256-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z7.b, z0.b[16]
; VBITS_GE_256-NEXT: mov z3.b, z0.b[24]
; VBITS_GE_256-NEXT: fmov w13, s20
; VBITS_GE_256-NEXT: ubfiz w18, w4, #13, #1
; VBITS_GE_256-NEXT: str w9, [sp, #8] // 4-byte Spill
; VBITS_GE_256-NEXT: fmov w9, s25
; VBITS_GE_256-NEXT: orr w10, w10, w1
; VBITS_GE_256-NEXT: mov z4.b, z0.b[25]
; VBITS_GE_256-NEXT: fmov w14, s21
; VBITS_GE_256-NEXT: ubfiz w17, w5, #14, #1
; VBITS_GE_256-NEXT: ubfiz w1, w19, #18, #1
; VBITS_GE_256-NEXT: orr w10, w10, w16
; VBITS_GE_256-NEXT: ubfiz w16, w20, #19, #1
; VBITS_GE_256-NEXT: fmov w15, s22
; VBITS_GE_256-NEXT: orr w10, w10, w18
; VBITS_GE_256-NEXT: ubfiz w24, w21, #20, #1
; VBITS_GE_256-NEXT: str w9, [sp, #20] // 4-byte Spill
; VBITS_GE_256-NEXT: umov w9, v1.b[6]
; VBITS_GE_256-NEXT: fmov w7, s7
; VBITS_GE_256-NEXT: orr w17, w10, w17
; VBITS_GE_256-NEXT: orr w10, w1, w16
; VBITS_GE_256-NEXT: fmov w22, s3
; VBITS_GE_256-NEXT: ubfiz w13, w13, #21, #1
; VBITS_GE_256-NEXT: umov w6, v0.b[15]
; VBITS_GE_256-NEXT: fmov w21, s4
; VBITS_GE_256-NEXT: orr w24, w10, w24
; VBITS_GE_256-NEXT: ubfiz w14, w14, #22, #1
; VBITS_GE_256-NEXT: ubfiz w15, w15, #23, #1
; VBITS_GE_256-NEXT: orr w24, w24, w13
; VBITS_GE_256-NEXT: ubfiz w11, w7, #16, #1
; VBITS_GE_256-NEXT: umov w7, v1.b[1]
; VBITS_GE_256-NEXT: str w9, [sp, #12] // 4-byte Spill
; VBITS_GE_256-NEXT: fmov w9, s1
; VBITS_GE_256-NEXT: orr w14, w24, w14
; VBITS_GE_256-NEXT: ubfiz w22, w22, #24, #1
; VBITS_GE_256-NEXT: mov z5.b, z0.b[26]
; VBITS_GE_256-NEXT: orr w14, w14, w15
; VBITS_GE_256-NEXT: ubfiz w15, w21, #25, #1
; VBITS_GE_256-NEXT: mov z6.b, z0.b[27]
; VBITS_GE_256-NEXT: ubfiz w12, w6, #15, #1
; VBITS_GE_256-NEXT: umov w6, v1.b[2]
; VBITS_GE_256-NEXT: orr w14, w14, w22
; VBITS_GE_256-NEXT: mov z23.b, z0.b[28]
; VBITS_GE_256-NEXT: orr w15, w14, w15
; VBITS_GE_256-NEXT: and w14, w9, #0x1
; VBITS_GE_256-NEXT: fmov w3, s5
; VBITS_GE_256-NEXT: umov w4, v1.b[3]
; VBITS_GE_256-NEXT: umov w28, v1.b[8]
; VBITS_GE_256-NEXT: bfi w14, w7, #1, #1
; VBITS_GE_256-NEXT: fmov w18, s6
; VBITS_GE_256-NEXT: umov w26, v1.b[7]
; VBITS_GE_256-NEXT: orr w12, w17, w12
; VBITS_GE_256-NEXT: fmov w17, s23
; VBITS_GE_256-NEXT: umov w1, v1.b[4]
; VBITS_GE_256-NEXT: umov w29, v1.b[9]
; VBITS_GE_256-NEXT: bfi w14, w6, #2, #1
; VBITS_GE_256-NEXT: orr w5, w12, w11
; VBITS_GE_256-NEXT: umov w30, v1.b[10]
; VBITS_GE_256-NEXT: mov z7.b, z1.b[18]
; VBITS_GE_256-NEXT: mov z16.b, z1.b[19]
; VBITS_GE_256-NEXT: ldr w11, [sp, #16] // 4-byte Reload
; VBITS_GE_256-NEXT: ubfiz w3, w3, #26, #1
; VBITS_GE_256-NEXT: umov w16, v1.b[5]
; VBITS_GE_256-NEXT: umov w23, v1.b[11]
; VBITS_GE_256-NEXT: mov z17.b, z1.b[20]
; VBITS_GE_256-NEXT: ubfiz w18, w18, #27, #1
; VBITS_GE_256-NEXT: bfi w14, w4, #3, #1
; VBITS_GE_256-NEXT: ubfiz w7, w28, #8, #1
; VBITS_GE_256-NEXT: ldr w28, [sp, #8] // 4-byte Reload
; VBITS_GE_256-NEXT: umov w27, v1.b[12]
; VBITS_GE_256-NEXT: mov z18.b, z1.b[21]
; VBITS_GE_256-NEXT: ubfiz w11, w11, #17, #1
; VBITS_GE_256-NEXT: orr w15, w15, w3
; VBITS_GE_256-NEXT: ubfiz w17, w17, #28, #1
; VBITS_GE_256-NEXT: ubfiz w4, w26, #7, #1
; VBITS_GE_256-NEXT: umov w25, v1.b[13]
; VBITS_GE_256-NEXT: mov z19.b, z1.b[22]
; VBITS_GE_256-NEXT: fmov w24, s7
; VBITS_GE_256-NEXT: fmov w21, s16
; VBITS_GE_256-NEXT: orr w15, w15, w18
; VBITS_GE_256-NEXT: ubfiz w28, w28, #29, #1
; VBITS_GE_256-NEXT: ubfiz w29, w29, #9, #1
; VBITS_GE_256-NEXT: bfi w14, w1, #4, #1
; VBITS_GE_256-NEXT: mov z20.b, z1.b[23]
; VBITS_GE_256-NEXT: fmov w22, s17
; VBITS_GE_256-NEXT: orr w11, w5, w11
; VBITS_GE_256-NEXT: orr w15, w15, w17
; VBITS_GE_256-NEXT: orr w1, w4, w7
; VBITS_GE_256-NEXT: ubfiz w4, w30, #10, #1
; VBITS_GE_256-NEXT: fmov w5, s18
; VBITS_GE_256-NEXT: orr w8, w8, w11
; VBITS_GE_256-NEXT: orr w11, w15, w28
; VBITS_GE_256-NEXT: orr w15, w1, w29
; VBITS_GE_256-NEXT: bfi w14, w16, #5, #1
; VBITS_GE_256-NEXT: ubfiz w16, w23, #11, #1
; VBITS_GE_256-NEXT: mov z21.b, z1.b[24]
; VBITS_GE_256-NEXT: mov z22.b, z1.b[25]
; VBITS_GE_256-NEXT: fmov w12, s19
; VBITS_GE_256-NEXT: orr w15, w15, w4
; VBITS_GE_256-NEXT: ubfiz w1, w27, #12, #1
; VBITS_GE_256-NEXT: ubfiz w4, w24, #18, #1
; VBITS_GE_256-NEXT: ubfiz w7, w21, #19, #1
; VBITS_GE_256-NEXT: umov w19, v1.b[14]
; VBITS_GE_256-NEXT: fmov w9, s20
; VBITS_GE_256-NEXT: orr w15, w15, w16
; VBITS_GE_256-NEXT: ubfiz w16, w25, #13, #1
; VBITS_GE_256-NEXT: ubfiz w21, w22, #20, #1
; VBITS_GE_256-NEXT: umov w20, v1.b[15]
; VBITS_GE_256-NEXT: mov z23.b, z1.b[26]
; VBITS_GE_256-NEXT: orr w15, w15, w1
; VBITS_GE_256-NEXT: orr w1, w4, w7
; VBITS_GE_256-NEXT: ubfiz w4, w5, #21, #1
; VBITS_GE_256-NEXT: mov z24.b, z1.b[27]
; VBITS_GE_256-NEXT: fmov w3, s21
; VBITS_GE_256-NEXT: fmov w18, s22
; VBITS_GE_256-NEXT: orr w15, w15, w16
; VBITS_GE_256-NEXT: orr w16, w1, w21
; VBITS_GE_256-NEXT: ubfiz w12, w12, #22, #1
; VBITS_GE_256-NEXT: orr w16, w16, w4
; VBITS_GE_256-NEXT: ubfiz w9, w9, #23, #1
; VBITS_GE_256-NEXT: mov z3.b, z1.b[29]
; VBITS_GE_256-NEXT: fmov w6, s23
; VBITS_GE_256-NEXT: ubfiz w1, w19, #14, #1
; VBITS_GE_256-NEXT: orr w12, w16, w12
; VBITS_GE_256-NEXT: mov z5.b, z1.b[16]
; VBITS_GE_256-NEXT: mov z25.b, z1.b[28]
; VBITS_GE_256-NEXT: fmov w17, s24
; VBITS_GE_256-NEXT: ubfiz w16, w20, #15, #1
; VBITS_GE_256-NEXT: ubfiz w3, w3, #24, #1
; VBITS_GE_256-NEXT: orr w9, w12, w9
; VBITS_GE_256-NEXT: ubfiz w12, w18, #25, #1
; VBITS_GE_256-NEXT: ldr w18, [sp, #12] // 4-byte Reload
; VBITS_GE_256-NEXT: mov z6.b, z1.b[17]
; VBITS_GE_256-NEXT: orr w15, w15, w1
; VBITS_GE_256-NEXT: mov z4.b, z1.b[30]
; VBITS_GE_256-NEXT: orr w9, w9, w3
; VBITS_GE_256-NEXT: bfi w14, w18, #6, #1
; VBITS_GE_256-NEXT: orr w15, w15, w16
; VBITS_GE_256-NEXT: ubfiz w16, w6, #26, #1
; VBITS_GE_256-NEXT: fmov w18, s3
; VBITS_GE_256-NEXT: fmov w10, s5
; VBITS_GE_256-NEXT: fmov w26, s25
; VBITS_GE_256-NEXT: orr w9, w9, w12
; VBITS_GE_256-NEXT: ubfiz w17, w17, #27, #1
; VBITS_GE_256-NEXT: fmov w13, s6
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: ldr w12, [sp, #20] // 4-byte Reload
; VBITS_GE_256-NEXT: mov z0.b, z0.b[31]
; VBITS_GE_256-NEXT: orr w9, w9, w17
; VBITS_GE_256-NEXT: ubfiz w17, w18, #29, #1
; VBITS_GE_256-NEXT: fmov w18, s4
; VBITS_GE_256-NEXT: ubfiz w10, w10, #16, #1
; VBITS_GE_256-NEXT: ubfiz w16, w26, #28, #1
; VBITS_GE_256-NEXT: ubfiz w13, w13, #17, #1
; VBITS_GE_256-NEXT: mov z2.b, z1.b[31]
; VBITS_GE_256-NEXT: ubfiz w12, w12, #30, #1
; VBITS_GE_256-NEXT: ptrue p1.b
; VBITS_GE_256-NEXT: orr w10, w15, w10
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: ubfiz w15, w18, #30, #1
; VBITS_GE_256-NEXT: orr w10, w10, w13
; VBITS_GE_256-NEXT: orr w9, w9, w17
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: orr w10, w14, w10
; VBITS_GE_256-NEXT: orr w9, w9, w15
; VBITS_GE_256-NEXT: orr w8, w8, w11
; VBITS_GE_256-NEXT: fmov w11, s0
; VBITS_GE_256-NEXT: orr w9, w10, w9
; VBITS_GE_256-NEXT: fmov w10, s2
; VBITS_GE_256-NEXT: orr w8, w8, w11, lsl #31
; VBITS_GE_256-NEXT: orr w9, w9, w10, lsl #31
; VBITS_GE_256-NEXT: orr x8, x9, x8, lsl #32
; VBITS_GE_256-NEXT: adrp x9, .LCPI7_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI7_0
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #0, .LBB7_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rb { z2.b }, p1/z, [x0]
; VBITS_GE_256-NEXT: mov z1.d, z0.d
; VBITS_GE_256-NEXT: add x0, x0, #1
; VBITS_GE_256-NEXT: mov z0.d, z2.d
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB7_3
; VBITS_GE_256-NEXT: b .LBB7_4
; VBITS_GE_256-NEXT: .LBB7_2:
; VBITS_GE_256-NEXT: mov z1.d, z0.d
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB7_4
; VBITS_GE_256-NEXT: .LBB7_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: .LBB7_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB7_68
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB7_69
; VBITS_GE_256-NEXT: .LBB7_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB7_70
; VBITS_GE_256-NEXT: .LBB7_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB7_71
; VBITS_GE_256-NEXT: .LBB7_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB7_72
; VBITS_GE_256-NEXT: .LBB7_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB7_73
; VBITS_GE_256-NEXT: .LBB7_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB7_74
; VBITS_GE_256-NEXT: .LBB7_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB7_75
; VBITS_GE_256-NEXT: .LBB7_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB7_76
; VBITS_GE_256-NEXT: .LBB7_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB7_77
; VBITS_GE_256-NEXT: .LBB7_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB7_78
; VBITS_GE_256-NEXT: .LBB7_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB7_79
; VBITS_GE_256-NEXT: .LBB7_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB7_80
; VBITS_GE_256-NEXT: .LBB7_17: // %else54
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB7_81
; VBITS_GE_256-NEXT: .LBB7_18: // %else58
; VBITS_GE_256-NEXT: tbnz w8, #16, .LBB7_82
; VBITS_GE_256-NEXT: .LBB7_19: // %else62
; VBITS_GE_256-NEXT: tbnz w8, #17, .LBB7_83
; VBITS_GE_256-NEXT: .LBB7_20: // %else66
; VBITS_GE_256-NEXT: tbnz w8, #18, .LBB7_84
; VBITS_GE_256-NEXT: .LBB7_21: // %else70
; VBITS_GE_256-NEXT: tbnz w8, #19, .LBB7_85
; VBITS_GE_256-NEXT: .LBB7_22: // %else74
; VBITS_GE_256-NEXT: tbnz w8, #20, .LBB7_86
; VBITS_GE_256-NEXT: .LBB7_23: // %else78
; VBITS_GE_256-NEXT: tbnz w8, #21, .LBB7_87
; VBITS_GE_256-NEXT: .LBB7_24: // %else82
; VBITS_GE_256-NEXT: tbnz w8, #22, .LBB7_88
; VBITS_GE_256-NEXT: .LBB7_25: // %else86
; VBITS_GE_256-NEXT: tbnz w8, #23, .LBB7_89
; VBITS_GE_256-NEXT: .LBB7_26: // %else90
; VBITS_GE_256-NEXT: tbnz w8, #24, .LBB7_90
; VBITS_GE_256-NEXT: .LBB7_27: // %else94
; VBITS_GE_256-NEXT: tbnz w8, #25, .LBB7_91
; VBITS_GE_256-NEXT: .LBB7_28: // %else98
; VBITS_GE_256-NEXT: tbnz w8, #26, .LBB7_92
; VBITS_GE_256-NEXT: .LBB7_29: // %else102
; VBITS_GE_256-NEXT: tbnz w8, #27, .LBB7_93
; VBITS_GE_256-NEXT: .LBB7_30: // %else106
; VBITS_GE_256-NEXT: tbnz w8, #28, .LBB7_94
; VBITS_GE_256-NEXT: .LBB7_31: // %else110
; VBITS_GE_256-NEXT: tbnz w8, #29, .LBB7_95
; VBITS_GE_256-NEXT: .LBB7_32: // %else114
; VBITS_GE_256-NEXT: tbnz w8, #30, .LBB7_96
; VBITS_GE_256-NEXT: .LBB7_33: // %else118
; VBITS_GE_256-NEXT: tbnz w8, #31, .LBB7_97
; VBITS_GE_256-NEXT: .LBB7_34: // %else122
; VBITS_GE_256-NEXT: tbnz x8, #32, .LBB7_98
; VBITS_GE_256-NEXT: .LBB7_35: // %else126
; VBITS_GE_256-NEXT: tbnz x8, #33, .LBB7_99
; VBITS_GE_256-NEXT: .LBB7_36: // %else130
; VBITS_GE_256-NEXT: tbnz x8, #34, .LBB7_100
; VBITS_GE_256-NEXT: .LBB7_37: // %else134
; VBITS_GE_256-NEXT: tbnz x8, #35, .LBB7_101
; VBITS_GE_256-NEXT: .LBB7_38: // %else138
; VBITS_GE_256-NEXT: tbnz x8, #36, .LBB7_102
; VBITS_GE_256-NEXT: .LBB7_39: // %else142
; VBITS_GE_256-NEXT: tbnz x8, #37, .LBB7_103
; VBITS_GE_256-NEXT: .LBB7_40: // %else146
; VBITS_GE_256-NEXT: tbnz x8, #38, .LBB7_104
; VBITS_GE_256-NEXT: .LBB7_41: // %else150
; VBITS_GE_256-NEXT: tbnz x8, #39, .LBB7_105
; VBITS_GE_256-NEXT: .LBB7_42: // %else154
; VBITS_GE_256-NEXT: tbnz x8, #40, .LBB7_106
; VBITS_GE_256-NEXT: .LBB7_43: // %else158
; VBITS_GE_256-NEXT: tbnz x8, #41, .LBB7_107
; VBITS_GE_256-NEXT: .LBB7_44: // %else162
; VBITS_GE_256-NEXT: tbnz x8, #42, .LBB7_108
; VBITS_GE_256-NEXT: .LBB7_45: // %else166
; VBITS_GE_256-NEXT: tbnz x8, #43, .LBB7_109
; VBITS_GE_256-NEXT: .LBB7_46: // %else170
; VBITS_GE_256-NEXT: tbnz x8, #44, .LBB7_110
; VBITS_GE_256-NEXT: .LBB7_47: // %else174
; VBITS_GE_256-NEXT: tbnz x8, #45, .LBB7_111
; VBITS_GE_256-NEXT: .LBB7_48: // %else178
; VBITS_GE_256-NEXT: tbnz x8, #46, .LBB7_112
; VBITS_GE_256-NEXT: .LBB7_49: // %else182
; VBITS_GE_256-NEXT: tbnz x8, #47, .LBB7_113
; VBITS_GE_256-NEXT: .LBB7_50: // %else186
; VBITS_GE_256-NEXT: tbnz x8, #48, .LBB7_114
; VBITS_GE_256-NEXT: .LBB7_51: // %else190
; VBITS_GE_256-NEXT: tbnz x8, #49, .LBB7_115
; VBITS_GE_256-NEXT: .LBB7_52: // %else194
; VBITS_GE_256-NEXT: tbnz x8, #50, .LBB7_116
; VBITS_GE_256-NEXT: .LBB7_53: // %else198
; VBITS_GE_256-NEXT: tbnz x8, #51, .LBB7_117
; VBITS_GE_256-NEXT: .LBB7_54: // %else202
; VBITS_GE_256-NEXT: tbnz x8, #52, .LBB7_118
; VBITS_GE_256-NEXT: .LBB7_55: // %else206
; VBITS_GE_256-NEXT: tbnz x8, #53, .LBB7_119
; VBITS_GE_256-NEXT: .LBB7_56: // %else210
; VBITS_GE_256-NEXT: tbnz x8, #54, .LBB7_120
; VBITS_GE_256-NEXT: .LBB7_57: // %else214
; VBITS_GE_256-NEXT: tbnz x8, #55, .LBB7_121
; VBITS_GE_256-NEXT: .LBB7_58: // %else218
; VBITS_GE_256-NEXT: tbnz x8, #56, .LBB7_122
; VBITS_GE_256-NEXT: .LBB7_59: // %else222
; VBITS_GE_256-NEXT: tbnz x8, #57, .LBB7_123
; VBITS_GE_256-NEXT: .LBB7_60: // %else226
; VBITS_GE_256-NEXT: tbnz x8, #58, .LBB7_124
; VBITS_GE_256-NEXT: .LBB7_61: // %else230
; VBITS_GE_256-NEXT: tbnz x8, #59, .LBB7_125
; VBITS_GE_256-NEXT: .LBB7_62: // %else234
; VBITS_GE_256-NEXT: tbnz x8, #60, .LBB7_126
; VBITS_GE_256-NEXT: .LBB7_63: // %else238
; VBITS_GE_256-NEXT: tbnz x8, #61, .LBB7_127
; VBITS_GE_256-NEXT: .LBB7_64: // %else242
; VBITS_GE_256-NEXT: tbnz x8, #62, .LBB7_128
; VBITS_GE_256-NEXT: .LBB7_65: // %else246
; VBITS_GE_256-NEXT: tbz x8, #63, .LBB7_67
; VBITS_GE_256-NEXT: .LBB7_66: // %cond.load249
; VBITS_GE_256-NEXT: mov w8, #31 // =0x1f
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w8
; VBITS_GE_256-NEXT: ldrb w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w8
; VBITS_GE_256-NEXT: .LBB7_67: // %else250
; VBITS_GE_256-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x2, x8]
; VBITS_GE_256-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x2]
; VBITS_GE_256-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: add sp, sp, #128
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB7_68: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB7_6
; VBITS_GE_256-NEXT: .LBB7_69: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB7_7
; VBITS_GE_256-NEXT: .LBB7_70: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB7_8
; VBITS_GE_256-NEXT: .LBB7_71: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB7_9
; VBITS_GE_256-NEXT: .LBB7_72: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB7_10
; VBITS_GE_256-NEXT: .LBB7_73: // %cond.load25
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB7_11
; VBITS_GE_256-NEXT: .LBB7_74: // %cond.load29
; VBITS_GE_256-NEXT: mov w9, #8 // =0x8
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB7_12
; VBITS_GE_256-NEXT: .LBB7_75: // %cond.load33
; VBITS_GE_256-NEXT: mov w9, #9 // =0x9
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB7_13
; VBITS_GE_256-NEXT: .LBB7_76: // %cond.load37
; VBITS_GE_256-NEXT: mov w9, #10 // =0xa
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB7_14
; VBITS_GE_256-NEXT: .LBB7_77: // %cond.load41
; VBITS_GE_256-NEXT: mov w9, #11 // =0xb
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB7_15
; VBITS_GE_256-NEXT: .LBB7_78: // %cond.load45
; VBITS_GE_256-NEXT: mov w9, #12 // =0xc
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB7_16
; VBITS_GE_256-NEXT: .LBB7_79: // %cond.load49
; VBITS_GE_256-NEXT: mov w9, #13 // =0xd
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB7_17
; VBITS_GE_256-NEXT: .LBB7_80: // %cond.load53
; VBITS_GE_256-NEXT: mov w9, #14 // =0xe
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB7_18
; VBITS_GE_256-NEXT: .LBB7_81: // %cond.load57
; VBITS_GE_256-NEXT: mov w9, #15 // =0xf
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #16, .LBB7_19
; VBITS_GE_256-NEXT: .LBB7_82: // %cond.load61
; VBITS_GE_256-NEXT: mov w9, #16 // =0x10
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #17, .LBB7_20
; VBITS_GE_256-NEXT: .LBB7_83: // %cond.load65
; VBITS_GE_256-NEXT: mov w9, #17 // =0x11
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #18, .LBB7_21
; VBITS_GE_256-NEXT: .LBB7_84: // %cond.load69
; VBITS_GE_256-NEXT: mov w9, #18 // =0x12
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #19, .LBB7_22
; VBITS_GE_256-NEXT: .LBB7_85: // %cond.load73
; VBITS_GE_256-NEXT: mov w9, #19 // =0x13
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #20, .LBB7_23
; VBITS_GE_256-NEXT: .LBB7_86: // %cond.load77
; VBITS_GE_256-NEXT: mov w9, #20 // =0x14
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #21, .LBB7_24
; VBITS_GE_256-NEXT: .LBB7_87: // %cond.load81
; VBITS_GE_256-NEXT: mov w9, #21 // =0x15
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #22, .LBB7_25
; VBITS_GE_256-NEXT: .LBB7_88: // %cond.load85
; VBITS_GE_256-NEXT: mov w9, #22 // =0x16
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #23, .LBB7_26
; VBITS_GE_256-NEXT: .LBB7_89: // %cond.load89
; VBITS_GE_256-NEXT: mov w9, #23 // =0x17
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #24, .LBB7_27
; VBITS_GE_256-NEXT: .LBB7_90: // %cond.load93
; VBITS_GE_256-NEXT: mov w9, #24 // =0x18
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #25, .LBB7_28
; VBITS_GE_256-NEXT: .LBB7_91: // %cond.load97
; VBITS_GE_256-NEXT: mov w9, #25 // =0x19
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #26, .LBB7_29
; VBITS_GE_256-NEXT: .LBB7_92: // %cond.load101
; VBITS_GE_256-NEXT: mov w9, #26 // =0x1a
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #27, .LBB7_30
; VBITS_GE_256-NEXT: .LBB7_93: // %cond.load105
; VBITS_GE_256-NEXT: mov w9, #27 // =0x1b
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #28, .LBB7_31
; VBITS_GE_256-NEXT: .LBB7_94: // %cond.load109
; VBITS_GE_256-NEXT: mov w9, #28 // =0x1c
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #29, .LBB7_32
; VBITS_GE_256-NEXT: .LBB7_95: // %cond.load113
; VBITS_GE_256-NEXT: mov w9, #29 // =0x1d
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #30, .LBB7_33
; VBITS_GE_256-NEXT: .LBB7_96: // %cond.load117
; VBITS_GE_256-NEXT: mov w9, #30 // =0x1e
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #31, .LBB7_34
; VBITS_GE_256-NEXT: .LBB7_97: // %cond.load121
; VBITS_GE_256-NEXT: mov w9, #31 // =0x1f
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #32, .LBB7_35
; VBITS_GE_256-NEXT: .LBB7_98: // %cond.load125
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: ptrue p2.b, vl1
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #33, .LBB7_36
; VBITS_GE_256-NEXT: .LBB7_99: // %cond.load129
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #34, .LBB7_37
; VBITS_GE_256-NEXT: .LBB7_100: // %cond.load133
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #35, .LBB7_38
; VBITS_GE_256-NEXT: .LBB7_101: // %cond.load137
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #36, .LBB7_39
; VBITS_GE_256-NEXT: .LBB7_102: // %cond.load141
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #37, .LBB7_40
; VBITS_GE_256-NEXT: .LBB7_103: // %cond.load145
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #38, .LBB7_41
; VBITS_GE_256-NEXT: .LBB7_104: // %cond.load149
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #39, .LBB7_42
; VBITS_GE_256-NEXT: .LBB7_105: // %cond.load153
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #40, .LBB7_43
; VBITS_GE_256-NEXT: .LBB7_106: // %cond.load157
; VBITS_GE_256-NEXT: mov w9, #8 // =0x8
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #41, .LBB7_44
; VBITS_GE_256-NEXT: .LBB7_107: // %cond.load161
; VBITS_GE_256-NEXT: mov w9, #9 // =0x9
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #42, .LBB7_45
; VBITS_GE_256-NEXT: .LBB7_108: // %cond.load165
; VBITS_GE_256-NEXT: mov w9, #10 // =0xa
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #43, .LBB7_46
; VBITS_GE_256-NEXT: .LBB7_109: // %cond.load169
; VBITS_GE_256-NEXT: mov w9, #11 // =0xb
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #44, .LBB7_47
; VBITS_GE_256-NEXT: .LBB7_110: // %cond.load173
; VBITS_GE_256-NEXT: mov w9, #12 // =0xc
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #45, .LBB7_48
; VBITS_GE_256-NEXT: .LBB7_111: // %cond.load177
; VBITS_GE_256-NEXT: mov w9, #13 // =0xd
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #46, .LBB7_49
; VBITS_GE_256-NEXT: .LBB7_112: // %cond.load181
; VBITS_GE_256-NEXT: mov w9, #14 // =0xe
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #47, .LBB7_50
; VBITS_GE_256-NEXT: .LBB7_113: // %cond.load185
; VBITS_GE_256-NEXT: mov w9, #15 // =0xf
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #48, .LBB7_51
; VBITS_GE_256-NEXT: .LBB7_114: // %cond.load189
; VBITS_GE_256-NEXT: mov w9, #16 // =0x10
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #49, .LBB7_52
; VBITS_GE_256-NEXT: .LBB7_115: // %cond.load193
; VBITS_GE_256-NEXT: mov w9, #17 // =0x11
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #50, .LBB7_53
; VBITS_GE_256-NEXT: .LBB7_116: // %cond.load197
; VBITS_GE_256-NEXT: mov w9, #18 // =0x12
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #51, .LBB7_54
; VBITS_GE_256-NEXT: .LBB7_117: // %cond.load201
; VBITS_GE_256-NEXT: mov w9, #19 // =0x13
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #52, .LBB7_55
; VBITS_GE_256-NEXT: .LBB7_118: // %cond.load205
; VBITS_GE_256-NEXT: mov w9, #20 // =0x14
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #53, .LBB7_56
; VBITS_GE_256-NEXT: .LBB7_119: // %cond.load209
; VBITS_GE_256-NEXT: mov w9, #21 // =0x15
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #54, .LBB7_57
; VBITS_GE_256-NEXT: .LBB7_120: // %cond.load213
; VBITS_GE_256-NEXT: mov w9, #22 // =0x16
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #55, .LBB7_58
; VBITS_GE_256-NEXT: .LBB7_121: // %cond.load217
; VBITS_GE_256-NEXT: mov w9, #23 // =0x17
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #56, .LBB7_59
; VBITS_GE_256-NEXT: .LBB7_122: // %cond.load221
; VBITS_GE_256-NEXT: mov w9, #24 // =0x18
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #57, .LBB7_60
; VBITS_GE_256-NEXT: .LBB7_123: // %cond.load225
; VBITS_GE_256-NEXT: mov w9, #25 // =0x19
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #58, .LBB7_61
; VBITS_GE_256-NEXT: .LBB7_124: // %cond.load229
; VBITS_GE_256-NEXT: mov w9, #26 // =0x1a
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #59, .LBB7_62
; VBITS_GE_256-NEXT: .LBB7_125: // %cond.load233
; VBITS_GE_256-NEXT: mov w9, #27 // =0x1b
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #60, .LBB7_63
; VBITS_GE_256-NEXT: .LBB7_126: // %cond.load237
; VBITS_GE_256-NEXT: mov w9, #28 // =0x1c
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #61, .LBB7_64
; VBITS_GE_256-NEXT: .LBB7_127: // %cond.load241
; VBITS_GE_256-NEXT: mov w9, #29 // =0x1d
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz x8, #62, .LBB7_65
; VBITS_GE_256-NEXT: .LBB7_128: // %cond.load245
; VBITS_GE_256-NEXT: mov w9, #30 // =0x1e
; VBITS_GE_256-NEXT: index z2.b, #0, #1
; VBITS_GE_256-NEXT: mov z3.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z2.b, z3.b
; VBITS_GE_256-NEXT: mov z1.b, p2/m, w9
; VBITS_GE_256-NEXT: tbnz x8, #63, .LBB7_66
; VBITS_GE_256-NEXT: b .LBB7_67
;
; VBITS_GE_512-LABEL: masked_load_v64i8:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #112
; VBITS_GE_512-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 112
; VBITS_GE_512-NEXT: .cfi_offset w19, -8
; VBITS_GE_512-NEXT: .cfi_offset w20, -16
; VBITS_GE_512-NEXT: .cfi_offset w21, -24
; VBITS_GE_512-NEXT: .cfi_offset w22, -32
; VBITS_GE_512-NEXT: .cfi_offset w23, -40
; VBITS_GE_512-NEXT: .cfi_offset w24, -48
; VBITS_GE_512-NEXT: .cfi_offset w25, -56
; VBITS_GE_512-NEXT: .cfi_offset w26, -64
; VBITS_GE_512-NEXT: .cfi_offset w27, -72
; VBITS_GE_512-NEXT: .cfi_offset w28, -80
; VBITS_GE_512-NEXT: .cfi_offset w30, -88
; VBITS_GE_512-NEXT: .cfi_offset w29, -96
; VBITS_GE_512-NEXT: ptrue p0.b, vl64
; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
; VBITS_GE_512-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.b
; VBITS_GE_512-NEXT: umov w11, v0.b[1]
; VBITS_GE_512-NEXT: fmov w22, s0
; VBITS_GE_512-NEXT: umov w12, v0.b[2]
; VBITS_GE_512-NEXT: umov w13, v0.b[3]
; VBITS_GE_512-NEXT: umov w14, v0.b[7]
; VBITS_GE_512-NEXT: umov w1, v0.b[8]
; VBITS_GE_512-NEXT: umov w16, v0.b[9]
; VBITS_GE_512-NEXT: mov z3.b, z0.b[18]
; VBITS_GE_512-NEXT: mov z5.b, z0.b[19]
; VBITS_GE_512-NEXT: and x22, x22, #0x1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: umov w17, v0.b[10]
; VBITS_GE_512-NEXT: bfi x22, x11, #1, #1
; VBITS_GE_512-NEXT: mov z6.b, z0.b[20]
; VBITS_GE_512-NEXT: umov w3, v0.b[11]
; VBITS_GE_512-NEXT: mov z4.b, z0.b[21]
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: mov z7.b, z0.b[22]
; VBITS_GE_512-NEXT: bfi x22, x12, #2, #1
; VBITS_GE_512-NEXT: fmov w19, s3
; VBITS_GE_512-NEXT: fmov w20, s5
; VBITS_GE_512-NEXT: ubfiz x14, x14, #7, #1
; VBITS_GE_512-NEXT: ubfiz x1, x1, #8, #1
; VBITS_GE_512-NEXT: umov w4, v0.b[12]
; VBITS_GE_512-NEXT: bfi x22, x13, #3, #1
; VBITS_GE_512-NEXT: mov z16.b, z0.b[23]
; VBITS_GE_512-NEXT: fmov w21, s6
; VBITS_GE_512-NEXT: ubfiz x16, x16, #9, #1
; VBITS_GE_512-NEXT: umov w8, v0.b[6]
; VBITS_GE_512-NEXT: umov w5, v0.b[13]
; VBITS_GE_512-NEXT: mov z17.b, z0.b[24]
; VBITS_GE_512-NEXT: fmov w23, s4
; VBITS_GE_512-NEXT: orr x14, x14, x1
; VBITS_GE_512-NEXT: bfi x22, x10, #4, #1
; VBITS_GE_512-NEXT: ubfiz x10, x17, #10, #1
; VBITS_GE_512-NEXT: mov z18.b, z0.b[25]
; VBITS_GE_512-NEXT: fmov w24, s7
; VBITS_GE_512-NEXT: ubfiz x13, x19, #18, #1
; VBITS_GE_512-NEXT: ubfiz x19, x20, #19, #1
; VBITS_GE_512-NEXT: orr x14, x14, x16
; VBITS_GE_512-NEXT: ubfiz x16, x3, #11, #1
; VBITS_GE_512-NEXT: umov w15, v0.b[14]
; VBITS_GE_512-NEXT: mov z19.b, z0.b[26]
; VBITS_GE_512-NEXT: fmov w25, s16
; VBITS_GE_512-NEXT: ubfiz x1, x21, #20, #1
; VBITS_GE_512-NEXT: orr x10, x14, x10
; VBITS_GE_512-NEXT: bfi x22, x9, #5, #1
; VBITS_GE_512-NEXT: mov z20.b, z0.b[27]
; VBITS_GE_512-NEXT: fmov w26, s17
; VBITS_GE_512-NEXT: orr x13, x13, x19
; VBITS_GE_512-NEXT: ubfiz x9, x4, #12, #1
; VBITS_GE_512-NEXT: orr x10, x10, x16
; VBITS_GE_512-NEXT: ubfiz x16, x23, #21, #1
; VBITS_GE_512-NEXT: umov w18, v0.b[15]
; VBITS_GE_512-NEXT: mov z1.b, z0.b[16]
; VBITS_GE_512-NEXT: mov z21.b, z0.b[28]
; VBITS_GE_512-NEXT: fmov w11, s18
; VBITS_GE_512-NEXT: orr x13, x13, x1
; VBITS_GE_512-NEXT: ubfiz x14, x5, #13, #1
; VBITS_GE_512-NEXT: bfi x22, x8, #6, #1
; VBITS_GE_512-NEXT: ubfiz x8, x24, #22, #1
; VBITS_GE_512-NEXT: mov z2.b, z0.b[17]
; VBITS_GE_512-NEXT: mov z22.b, z0.b[29]
; VBITS_GE_512-NEXT: fmov w27, s19
; VBITS_GE_512-NEXT: orr x9, x10, x9
; VBITS_GE_512-NEXT: orr x10, x13, x16
; VBITS_GE_512-NEXT: ubfiz x13, x25, #23, #1
; VBITS_GE_512-NEXT: mov z5.b, z0.b[30]
; VBITS_GE_512-NEXT: fmov w28, s20
; VBITS_GE_512-NEXT: orr x9, x9, x14
; VBITS_GE_512-NEXT: orr x8, x10, x8
; VBITS_GE_512-NEXT: ubfiz x10, x15, #14, #1
; VBITS_GE_512-NEXT: ubfiz x14, x26, #24, #1
; VBITS_GE_512-NEXT: fmov w6, s1
; VBITS_GE_512-NEXT: fmov w29, s21
; VBITS_GE_512-NEXT: orr x8, x8, x13
; VBITS_GE_512-NEXT: ubfiz x11, x11, #25, #1
; VBITS_GE_512-NEXT: fmov w7, s2
; VBITS_GE_512-NEXT: fmov w30, s22
; VBITS_GE_512-NEXT: ubfiz x13, x18, #15, #1
; VBITS_GE_512-NEXT: orr x9, x9, x10
; VBITS_GE_512-NEXT: orr x8, x8, x14
; VBITS_GE_512-NEXT: ubfiz x10, x27, #26, #1
; VBITS_GE_512-NEXT: fmov w12, s5
; VBITS_GE_512-NEXT: orr x8, x8, x11
; VBITS_GE_512-NEXT: ubfiz x11, x28, #27, #1
; VBITS_GE_512-NEXT: mov z3.b, z0.b[31]
; VBITS_GE_512-NEXT: orr x9, x9, x13
; VBITS_GE_512-NEXT: orr x8, x8, x10
; VBITS_GE_512-NEXT: ubfiz x10, x6, #16, #1
; VBITS_GE_512-NEXT: ubfiz x13, x29, #28, #1
; VBITS_GE_512-NEXT: orr x8, x8, x11
; VBITS_GE_512-NEXT: ubfiz x11, x7, #17, #1
; VBITS_GE_512-NEXT: ubfiz x14, x30, #29, #1
; VBITS_GE_512-NEXT: mov z2.b, z0.b[32]
; VBITS_GE_512-NEXT: orr x9, x9, x10
; VBITS_GE_512-NEXT: orr x8, x8, x13
; VBITS_GE_512-NEXT: ubfiz x10, x12, #30, #1
; VBITS_GE_512-NEXT: fmov w12, s3
; VBITS_GE_512-NEXT: orr x9, x9, x11
; VBITS_GE_512-NEXT: orr x8, x8, x14
; VBITS_GE_512-NEXT: mov z1.b, z0.b[33]
; VBITS_GE_512-NEXT: orr x9, x22, x9
; VBITS_GE_512-NEXT: orr x8, x8, x10
; VBITS_GE_512-NEXT: orr x8, x9, x8
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: lsl w10, w12, #31
; VBITS_GE_512-NEXT: mov z2.b, z0.b[34]
; VBITS_GE_512-NEXT: orr x8, x8, x10
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #32
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[35]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #33
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[36]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #34
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[37]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #35
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[38]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #36
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[39]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #37
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[40]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #38
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[41]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #39
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[42]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #40
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[43]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #41
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[44]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #42
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[45]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #43
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[46]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #44
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[47]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #45
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[48]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #46
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[49]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #47
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[50]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #48
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[51]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #49
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[52]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #50
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[53]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #51
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[54]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #52
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[55]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #53
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[56]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #54
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[57]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #55
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[58]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #56
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[59]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #57
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[60]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #58
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[61]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: fmov w10, s1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #59
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: mov z2.b, z0.b[62]
; VBITS_GE_512-NEXT: mov z0.b, z0.b[63]
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #60
; VBITS_GE_512-NEXT: and w9, w10, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #61
; VBITS_GE_512-NEXT: fmov w9, s2
; VBITS_GE_512-NEXT: and w9, w9, #0x1
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #62
; VBITS_GE_512-NEXT: fmov w9, s0
; VBITS_GE_512-NEXT: orr x8, x8, x9, lsl #63
; VBITS_GE_512-NEXT: tbz w8, #0, .LBB7_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rb { z0.b }, p1/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #1
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB7_3
; VBITS_GE_512-NEXT: b .LBB7_4
; VBITS_GE_512-NEXT: .LBB7_2:
; VBITS_GE_512-NEXT: adrp x9, .LCPI7_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI7_0
; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB7_4
; VBITS_GE_512-NEXT: .LBB7_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: .LBB7_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB7_68
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB7_69
; VBITS_GE_512-NEXT: .LBB7_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB7_70
; VBITS_GE_512-NEXT: .LBB7_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB7_71
; VBITS_GE_512-NEXT: .LBB7_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB7_72
; VBITS_GE_512-NEXT: .LBB7_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB7_73
; VBITS_GE_512-NEXT: .LBB7_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB7_74
; VBITS_GE_512-NEXT: .LBB7_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB7_75
; VBITS_GE_512-NEXT: .LBB7_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB7_76
; VBITS_GE_512-NEXT: .LBB7_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB7_77
; VBITS_GE_512-NEXT: .LBB7_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB7_78
; VBITS_GE_512-NEXT: .LBB7_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB7_79
; VBITS_GE_512-NEXT: .LBB7_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB7_80
; VBITS_GE_512-NEXT: .LBB7_17: // %else54
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB7_81
; VBITS_GE_512-NEXT: .LBB7_18: // %else58
; VBITS_GE_512-NEXT: tbnz w8, #16, .LBB7_82
; VBITS_GE_512-NEXT: .LBB7_19: // %else62
; VBITS_GE_512-NEXT: tbnz w8, #17, .LBB7_83
; VBITS_GE_512-NEXT: .LBB7_20: // %else66
; VBITS_GE_512-NEXT: tbnz w8, #18, .LBB7_84
; VBITS_GE_512-NEXT: .LBB7_21: // %else70
; VBITS_GE_512-NEXT: tbnz w8, #19, .LBB7_85
; VBITS_GE_512-NEXT: .LBB7_22: // %else74
; VBITS_GE_512-NEXT: tbnz w8, #20, .LBB7_86
; VBITS_GE_512-NEXT: .LBB7_23: // %else78
; VBITS_GE_512-NEXT: tbnz w8, #21, .LBB7_87
; VBITS_GE_512-NEXT: .LBB7_24: // %else82
; VBITS_GE_512-NEXT: tbnz w8, #22, .LBB7_88
; VBITS_GE_512-NEXT: .LBB7_25: // %else86
; VBITS_GE_512-NEXT: tbnz w8, #23, .LBB7_89
; VBITS_GE_512-NEXT: .LBB7_26: // %else90
; VBITS_GE_512-NEXT: tbnz w8, #24, .LBB7_90
; VBITS_GE_512-NEXT: .LBB7_27: // %else94
; VBITS_GE_512-NEXT: tbnz w8, #25, .LBB7_91
; VBITS_GE_512-NEXT: .LBB7_28: // %else98
; VBITS_GE_512-NEXT: tbnz w8, #26, .LBB7_92
; VBITS_GE_512-NEXT: .LBB7_29: // %else102
; VBITS_GE_512-NEXT: tbnz w8, #27, .LBB7_93
; VBITS_GE_512-NEXT: .LBB7_30: // %else106
; VBITS_GE_512-NEXT: tbnz w8, #28, .LBB7_94
; VBITS_GE_512-NEXT: .LBB7_31: // %else110
; VBITS_GE_512-NEXT: tbnz w8, #29, .LBB7_95
; VBITS_GE_512-NEXT: .LBB7_32: // %else114
; VBITS_GE_512-NEXT: tbnz w8, #30, .LBB7_96
; VBITS_GE_512-NEXT: .LBB7_33: // %else118
; VBITS_GE_512-NEXT: tbnz w8, #31, .LBB7_97
; VBITS_GE_512-NEXT: .LBB7_34: // %else122
; VBITS_GE_512-NEXT: tbnz x8, #32, .LBB7_98
; VBITS_GE_512-NEXT: .LBB7_35: // %else126
; VBITS_GE_512-NEXT: tbnz x8, #33, .LBB7_99
; VBITS_GE_512-NEXT: .LBB7_36: // %else130
; VBITS_GE_512-NEXT: tbnz x8, #34, .LBB7_100
; VBITS_GE_512-NEXT: .LBB7_37: // %else134
; VBITS_GE_512-NEXT: tbnz x8, #35, .LBB7_101
; VBITS_GE_512-NEXT: .LBB7_38: // %else138
; VBITS_GE_512-NEXT: tbnz x8, #36, .LBB7_102
; VBITS_GE_512-NEXT: .LBB7_39: // %else142
; VBITS_GE_512-NEXT: tbnz x8, #37, .LBB7_103
; VBITS_GE_512-NEXT: .LBB7_40: // %else146
; VBITS_GE_512-NEXT: tbnz x8, #38, .LBB7_104
; VBITS_GE_512-NEXT: .LBB7_41: // %else150
; VBITS_GE_512-NEXT: tbnz x8, #39, .LBB7_105
; VBITS_GE_512-NEXT: .LBB7_42: // %else154
; VBITS_GE_512-NEXT: tbnz x8, #40, .LBB7_106
; VBITS_GE_512-NEXT: .LBB7_43: // %else158
; VBITS_GE_512-NEXT: tbnz x8, #41, .LBB7_107
; VBITS_GE_512-NEXT: .LBB7_44: // %else162
; VBITS_GE_512-NEXT: tbnz x8, #42, .LBB7_108
; VBITS_GE_512-NEXT: .LBB7_45: // %else166
; VBITS_GE_512-NEXT: tbnz x8, #43, .LBB7_109
; VBITS_GE_512-NEXT: .LBB7_46: // %else170
; VBITS_GE_512-NEXT: tbnz x8, #44, .LBB7_110
; VBITS_GE_512-NEXT: .LBB7_47: // %else174
; VBITS_GE_512-NEXT: tbnz x8, #45, .LBB7_111
; VBITS_GE_512-NEXT: .LBB7_48: // %else178
; VBITS_GE_512-NEXT: tbnz x8, #46, .LBB7_112
; VBITS_GE_512-NEXT: .LBB7_49: // %else182
; VBITS_GE_512-NEXT: tbnz x8, #47, .LBB7_113
; VBITS_GE_512-NEXT: .LBB7_50: // %else186
; VBITS_GE_512-NEXT: tbnz x8, #48, .LBB7_114
; VBITS_GE_512-NEXT: .LBB7_51: // %else190
; VBITS_GE_512-NEXT: tbnz x8, #49, .LBB7_115
; VBITS_GE_512-NEXT: .LBB7_52: // %else194
; VBITS_GE_512-NEXT: tbnz x8, #50, .LBB7_116
; VBITS_GE_512-NEXT: .LBB7_53: // %else198
; VBITS_GE_512-NEXT: tbnz x8, #51, .LBB7_117
; VBITS_GE_512-NEXT: .LBB7_54: // %else202
; VBITS_GE_512-NEXT: tbnz x8, #52, .LBB7_118
; VBITS_GE_512-NEXT: .LBB7_55: // %else206
; VBITS_GE_512-NEXT: tbnz x8, #53, .LBB7_119
; VBITS_GE_512-NEXT: .LBB7_56: // %else210
; VBITS_GE_512-NEXT: tbnz x8, #54, .LBB7_120
; VBITS_GE_512-NEXT: .LBB7_57: // %else214
; VBITS_GE_512-NEXT: tbnz x8, #55, .LBB7_121
; VBITS_GE_512-NEXT: .LBB7_58: // %else218
; VBITS_GE_512-NEXT: tbnz x8, #56, .LBB7_122
; VBITS_GE_512-NEXT: .LBB7_59: // %else222
; VBITS_GE_512-NEXT: tbnz x8, #57, .LBB7_123
; VBITS_GE_512-NEXT: .LBB7_60: // %else226
; VBITS_GE_512-NEXT: tbnz x8, #58, .LBB7_124
; VBITS_GE_512-NEXT: .LBB7_61: // %else230
; VBITS_GE_512-NEXT: tbnz x8, #59, .LBB7_125
; VBITS_GE_512-NEXT: .LBB7_62: // %else234
; VBITS_GE_512-NEXT: tbnz x8, #60, .LBB7_126
; VBITS_GE_512-NEXT: .LBB7_63: // %else238
; VBITS_GE_512-NEXT: tbnz x8, #61, .LBB7_127
; VBITS_GE_512-NEXT: .LBB7_64: // %else242
; VBITS_GE_512-NEXT: tbnz x8, #62, .LBB7_128
; VBITS_GE_512-NEXT: .LBB7_65: // %else246
; VBITS_GE_512-NEXT: tbz x8, #63, .LBB7_67
; VBITS_GE_512-NEXT: .LBB7_66: // %cond.load249
; VBITS_GE_512-NEXT: mov w8, #63 // =0x3f
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w8
; VBITS_GE_512-NEXT: ldrb w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w8
; VBITS_GE_512-NEXT: .LBB7_67: // %else250
; VBITS_GE_512-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x2]
; VBITS_GE_512-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: add sp, sp, #112
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB7_68: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB7_6
; VBITS_GE_512-NEXT: .LBB7_69: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB7_7
; VBITS_GE_512-NEXT: .LBB7_70: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB7_8
; VBITS_GE_512-NEXT: .LBB7_71: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB7_9
; VBITS_GE_512-NEXT: .LBB7_72: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB7_10
; VBITS_GE_512-NEXT: .LBB7_73: // %cond.load25
; VBITS_GE_512-NEXT: mov w9, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB7_11
; VBITS_GE_512-NEXT: .LBB7_74: // %cond.load29
; VBITS_GE_512-NEXT: mov w9, #8 // =0x8
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB7_12
; VBITS_GE_512-NEXT: .LBB7_75: // %cond.load33
; VBITS_GE_512-NEXT: mov w9, #9 // =0x9
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB7_13
; VBITS_GE_512-NEXT: .LBB7_76: // %cond.load37
; VBITS_GE_512-NEXT: mov w9, #10 // =0xa
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB7_14
; VBITS_GE_512-NEXT: .LBB7_77: // %cond.load41
; VBITS_GE_512-NEXT: mov w9, #11 // =0xb
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB7_15
; VBITS_GE_512-NEXT: .LBB7_78: // %cond.load45
; VBITS_GE_512-NEXT: mov w9, #12 // =0xc
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB7_16
; VBITS_GE_512-NEXT: .LBB7_79: // %cond.load49
; VBITS_GE_512-NEXT: mov w9, #13 // =0xd
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB7_17
; VBITS_GE_512-NEXT: .LBB7_80: // %cond.load53
; VBITS_GE_512-NEXT: mov w9, #14 // =0xe
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB7_18
; VBITS_GE_512-NEXT: .LBB7_81: // %cond.load57
; VBITS_GE_512-NEXT: mov w9, #15 // =0xf
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #16, .LBB7_19
; VBITS_GE_512-NEXT: .LBB7_82: // %cond.load61
; VBITS_GE_512-NEXT: mov w9, #16 // =0x10
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #17, .LBB7_20
; VBITS_GE_512-NEXT: .LBB7_83: // %cond.load65
; VBITS_GE_512-NEXT: mov w9, #17 // =0x11
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #18, .LBB7_21
; VBITS_GE_512-NEXT: .LBB7_84: // %cond.load69
; VBITS_GE_512-NEXT: mov w9, #18 // =0x12
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #19, .LBB7_22
; VBITS_GE_512-NEXT: .LBB7_85: // %cond.load73
; VBITS_GE_512-NEXT: mov w9, #19 // =0x13
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #20, .LBB7_23
; VBITS_GE_512-NEXT: .LBB7_86: // %cond.load77
; VBITS_GE_512-NEXT: mov w9, #20 // =0x14
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #21, .LBB7_24
; VBITS_GE_512-NEXT: .LBB7_87: // %cond.load81
; VBITS_GE_512-NEXT: mov w9, #21 // =0x15
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #22, .LBB7_25
; VBITS_GE_512-NEXT: .LBB7_88: // %cond.load85
; VBITS_GE_512-NEXT: mov w9, #22 // =0x16
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #23, .LBB7_26
; VBITS_GE_512-NEXT: .LBB7_89: // %cond.load89
; VBITS_GE_512-NEXT: mov w9, #23 // =0x17
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #24, .LBB7_27
; VBITS_GE_512-NEXT: .LBB7_90: // %cond.load93
; VBITS_GE_512-NEXT: mov w9, #24 // =0x18
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #25, .LBB7_28
; VBITS_GE_512-NEXT: .LBB7_91: // %cond.load97
; VBITS_GE_512-NEXT: mov w9, #25 // =0x19
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #26, .LBB7_29
; VBITS_GE_512-NEXT: .LBB7_92: // %cond.load101
; VBITS_GE_512-NEXT: mov w9, #26 // =0x1a
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #27, .LBB7_30
; VBITS_GE_512-NEXT: .LBB7_93: // %cond.load105
; VBITS_GE_512-NEXT: mov w9, #27 // =0x1b
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #28, .LBB7_31
; VBITS_GE_512-NEXT: .LBB7_94: // %cond.load109
; VBITS_GE_512-NEXT: mov w9, #28 // =0x1c
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #29, .LBB7_32
; VBITS_GE_512-NEXT: .LBB7_95: // %cond.load113
; VBITS_GE_512-NEXT: mov w9, #29 // =0x1d
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #30, .LBB7_33
; VBITS_GE_512-NEXT: .LBB7_96: // %cond.load117
; VBITS_GE_512-NEXT: mov w9, #30 // =0x1e
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #31, .LBB7_34
; VBITS_GE_512-NEXT: .LBB7_97: // %cond.load121
; VBITS_GE_512-NEXT: mov w9, #31 // =0x1f
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #32, .LBB7_35
; VBITS_GE_512-NEXT: .LBB7_98: // %cond.load125
; VBITS_GE_512-NEXT: mov w9, #32 // =0x20
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #33, .LBB7_36
; VBITS_GE_512-NEXT: .LBB7_99: // %cond.load129
; VBITS_GE_512-NEXT: mov w9, #33 // =0x21
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #34, .LBB7_37
; VBITS_GE_512-NEXT: .LBB7_100: // %cond.load133
; VBITS_GE_512-NEXT: mov w9, #34 // =0x22
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #35, .LBB7_38
; VBITS_GE_512-NEXT: .LBB7_101: // %cond.load137
; VBITS_GE_512-NEXT: mov w9, #35 // =0x23
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #36, .LBB7_39
; VBITS_GE_512-NEXT: .LBB7_102: // %cond.load141
; VBITS_GE_512-NEXT: mov w9, #36 // =0x24
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #37, .LBB7_40
; VBITS_GE_512-NEXT: .LBB7_103: // %cond.load145
; VBITS_GE_512-NEXT: mov w9, #37 // =0x25
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #38, .LBB7_41
; VBITS_GE_512-NEXT: .LBB7_104: // %cond.load149
; VBITS_GE_512-NEXT: mov w9, #38 // =0x26
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #39, .LBB7_42
; VBITS_GE_512-NEXT: .LBB7_105: // %cond.load153
; VBITS_GE_512-NEXT: mov w9, #39 // =0x27
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #40, .LBB7_43
; VBITS_GE_512-NEXT: .LBB7_106: // %cond.load157
; VBITS_GE_512-NEXT: mov w9, #40 // =0x28
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #41, .LBB7_44
; VBITS_GE_512-NEXT: .LBB7_107: // %cond.load161
; VBITS_GE_512-NEXT: mov w9, #41 // =0x29
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #42, .LBB7_45
; VBITS_GE_512-NEXT: .LBB7_108: // %cond.load165
; VBITS_GE_512-NEXT: mov w9, #42 // =0x2a
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #43, .LBB7_46
; VBITS_GE_512-NEXT: .LBB7_109: // %cond.load169
; VBITS_GE_512-NEXT: mov w9, #43 // =0x2b
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #44, .LBB7_47
; VBITS_GE_512-NEXT: .LBB7_110: // %cond.load173
; VBITS_GE_512-NEXT: mov w9, #44 // =0x2c
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #45, .LBB7_48
; VBITS_GE_512-NEXT: .LBB7_111: // %cond.load177
; VBITS_GE_512-NEXT: mov w9, #45 // =0x2d
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #46, .LBB7_49
; VBITS_GE_512-NEXT: .LBB7_112: // %cond.load181
; VBITS_GE_512-NEXT: mov w9, #46 // =0x2e
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #47, .LBB7_50
; VBITS_GE_512-NEXT: .LBB7_113: // %cond.load185
; VBITS_GE_512-NEXT: mov w9, #47 // =0x2f
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #48, .LBB7_51
; VBITS_GE_512-NEXT: .LBB7_114: // %cond.load189
; VBITS_GE_512-NEXT: mov w9, #48 // =0x30
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #49, .LBB7_52
; VBITS_GE_512-NEXT: .LBB7_115: // %cond.load193
; VBITS_GE_512-NEXT: mov w9, #49 // =0x31
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #50, .LBB7_53
; VBITS_GE_512-NEXT: .LBB7_116: // %cond.load197
; VBITS_GE_512-NEXT: mov w9, #50 // =0x32
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #51, .LBB7_54
; VBITS_GE_512-NEXT: .LBB7_117: // %cond.load201
; VBITS_GE_512-NEXT: mov w9, #51 // =0x33
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #52, .LBB7_55
; VBITS_GE_512-NEXT: .LBB7_118: // %cond.load205
; VBITS_GE_512-NEXT: mov w9, #52 // =0x34
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #53, .LBB7_56
; VBITS_GE_512-NEXT: .LBB7_119: // %cond.load209
; VBITS_GE_512-NEXT: mov w9, #53 // =0x35
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #54, .LBB7_57
; VBITS_GE_512-NEXT: .LBB7_120: // %cond.load213
; VBITS_GE_512-NEXT: mov w9, #54 // =0x36
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #55, .LBB7_58
; VBITS_GE_512-NEXT: .LBB7_121: // %cond.load217
; VBITS_GE_512-NEXT: mov w9, #55 // =0x37
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #56, .LBB7_59
; VBITS_GE_512-NEXT: .LBB7_122: // %cond.load221
; VBITS_GE_512-NEXT: mov w9, #56 // =0x38
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #57, .LBB7_60
; VBITS_GE_512-NEXT: .LBB7_123: // %cond.load225
; VBITS_GE_512-NEXT: mov w9, #57 // =0x39
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #58, .LBB7_61
; VBITS_GE_512-NEXT: .LBB7_124: // %cond.load229
; VBITS_GE_512-NEXT: mov w9, #58 // =0x3a
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #59, .LBB7_62
; VBITS_GE_512-NEXT: .LBB7_125: // %cond.load233
; VBITS_GE_512-NEXT: mov w9, #59 // =0x3b
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #60, .LBB7_63
; VBITS_GE_512-NEXT: .LBB7_126: // %cond.load237
; VBITS_GE_512-NEXT: mov w9, #60 // =0x3c
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #61, .LBB7_64
; VBITS_GE_512-NEXT: .LBB7_127: // %cond.load241
; VBITS_GE_512-NEXT: mov w9, #61 // =0x3d
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz x8, #62, .LBB7_65
; VBITS_GE_512-NEXT: .LBB7_128: // %cond.load245
; VBITS_GE_512-NEXT: mov w9, #62 // =0x3e
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbnz x8, #63, .LBB7_66
; VBITS_GE_512-NEXT: b .LBB7_67
;
; CHECK-EXPAND-LABEL: masked_load_v64i8:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: sub sp, sp, #96
; CHECK-EXPAND-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; CHECK-EXPAND-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-EXPAND-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-EXPAND-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-EXPAND-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 96
; CHECK-EXPAND-NEXT: .cfi_offset w19, -8
; CHECK-EXPAND-NEXT: .cfi_offset w20, -16
; CHECK-EXPAND-NEXT: .cfi_offset w21, -24
; CHECK-EXPAND-NEXT: .cfi_offset w22, -32
; CHECK-EXPAND-NEXT: .cfi_offset w23, -40
; CHECK-EXPAND-NEXT: .cfi_offset w24, -48
; CHECK-EXPAND-NEXT: .cfi_offset w25, -56
; CHECK-EXPAND-NEXT: .cfi_offset w26, -64
; CHECK-EXPAND-NEXT: .cfi_offset w27, -72
; CHECK-EXPAND-NEXT: .cfi_offset w28, -80
; CHECK-EXPAND-NEXT: ptrue p0.b, vl32
; CHECK-EXPAND-NEXT: ptrue p3.s
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ld1b { z1.b }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
; CHECK-EXPAND-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: umov w13, v0.b[1]
; CHECK-EXPAND-NEXT: fmov w7, s0
; CHECK-EXPAND-NEXT: umov w12, v0.b[2]
; CHECK-EXPAND-NEXT: umov w3, v0.b[7]
; CHECK-EXPAND-NEXT: umov w5, v0.b[8]
; CHECK-EXPAND-NEXT: umov w6, v0.b[9]
; CHECK-EXPAND-NEXT: umov w11, v0.b[3]
; CHECK-EXPAND-NEXT: umov w17, v0.b[10]
; CHECK-EXPAND-NEXT: umov w18, v0.b[11]
; CHECK-EXPAND-NEXT: and w7, w7, #0x1
; CHECK-EXPAND-NEXT: mov z6.b, z0.b[18]
; CHECK-EXPAND-NEXT: mov z7.b, z0.b[19]
; CHECK-EXPAND-NEXT: bfi w7, w13, #1, #1
; CHECK-EXPAND-NEXT: umov w10, v0.b[4]
; CHECK-EXPAND-NEXT: mov z16.b, z0.b[20]
; CHECK-EXPAND-NEXT: ubfiz w13, w3, #7, #1
; CHECK-EXPAND-NEXT: ubfiz w3, w5, #8, #1
; CHECK-EXPAND-NEXT: umov w4, v0.b[12]
; CHECK-EXPAND-NEXT: bfi w7, w12, #2, #1
; CHECK-EXPAND-NEXT: mov z17.b, z0.b[21]
; CHECK-EXPAND-NEXT: ubfiz w6, w6, #9, #1
; CHECK-EXPAND-NEXT: umov w9, v0.b[5]
; CHECK-EXPAND-NEXT: umov w15, v0.b[13]
; CHECK-EXPAND-NEXT: mov z18.b, z0.b[22]
; CHECK-EXPAND-NEXT: fmov w21, s6
; CHECK-EXPAND-NEXT: fmov w22, s7
; CHECK-EXPAND-NEXT: orr w12, w13, w3
; CHECK-EXPAND-NEXT: ubfiz w17, w17, #10, #1
; CHECK-EXPAND-NEXT: bfi w7, w11, #3, #1
; CHECK-EXPAND-NEXT: mov z19.b, z0.b[23]
; CHECK-EXPAND-NEXT: fmov w23, s16
; CHECK-EXPAND-NEXT: orr w12, w12, w6
; CHECK-EXPAND-NEXT: ubfiz w18, w18, #11, #1
; CHECK-EXPAND-NEXT: mov z20.b, z0.b[24]
; CHECK-EXPAND-NEXT: fmov w24, s17
; CHECK-EXPAND-NEXT: orr w12, w12, w17
; CHECK-EXPAND-NEXT: bfi w7, w10, #4, #1
; CHECK-EXPAND-NEXT: umov w16, v0.b[14]
; CHECK-EXPAND-NEXT: mov z21.b, z0.b[25]
; CHECK-EXPAND-NEXT: fmov w25, s18
; CHECK-EXPAND-NEXT: ubfiz w3, w4, #12, #1
; CHECK-EXPAND-NEXT: orr w10, w12, w18
; CHECK-EXPAND-NEXT: ubfiz w12, w21, #18, #1
; CHECK-EXPAND-NEXT: ubfiz w18, w22, #19, #1
; CHECK-EXPAND-NEXT: umov w14, v0.b[15]
; CHECK-EXPAND-NEXT: mov z22.b, z0.b[26]
; CHECK-EXPAND-NEXT: fmov w26, s19
; CHECK-EXPAND-NEXT: bfi w7, w9, #5, #1
; CHECK-EXPAND-NEXT: ubfiz w9, w15, #13, #1
; CHECK-EXPAND-NEXT: ubfiz w15, w23, #20, #1
; CHECK-EXPAND-NEXT: mov z23.b, z0.b[27]
; CHECK-EXPAND-NEXT: fmov w5, s20
; CHECK-EXPAND-NEXT: orr w10, w10, w3
; CHECK-EXPAND-NEXT: orr w12, w12, w18
; CHECK-EXPAND-NEXT: ubfiz w18, w24, #21, #1
; CHECK-EXPAND-NEXT: umov w8, v0.b[6]
; CHECK-EXPAND-NEXT: fmov w27, s21
; CHECK-EXPAND-NEXT: orr w9, w10, w9
; CHECK-EXPAND-NEXT: orr w10, w12, w15
; CHECK-EXPAND-NEXT: ubfiz w12, w25, #22, #1
; CHECK-EXPAND-NEXT: fmov w28, s22
; CHECK-EXPAND-NEXT: ubfiz w16, w16, #14, #1
; CHECK-EXPAND-NEXT: orr w10, w10, w18
; CHECK-EXPAND-NEXT: ubfiz w15, w26, #23, #1
; CHECK-EXPAND-NEXT: mov z4.b, z0.b[16]
; CHECK-EXPAND-NEXT: mov z24.b, z0.b[28]
; CHECK-EXPAND-NEXT: fmov w13, s23
; CHECK-EXPAND-NEXT: orr w10, w10, w12
; CHECK-EXPAND-NEXT: ubfiz w12, w14, #15, #1
; CHECK-EXPAND-NEXT: ubfiz w14, w5, #24, #1
; CHECK-EXPAND-NEXT: mov z5.b, z0.b[17]
; CHECK-EXPAND-NEXT: mov z3.b, z0.b[29]
; CHECK-EXPAND-NEXT: orr w9, w9, w16
; CHECK-EXPAND-NEXT: orr w10, w10, w15
; CHECK-EXPAND-NEXT: ubfiz w16, w27, #25, #1
; CHECK-EXPAND-NEXT: mov z2.b, z0.b[30]
; CHECK-EXPAND-NEXT: bfi w7, w8, #6, #1
; CHECK-EXPAND-NEXT: orr w8, w9, w12
; CHECK-EXPAND-NEXT: orr w9, w10, w14
; CHECK-EXPAND-NEXT: ubfiz w10, w28, #26, #1
; CHECK-EXPAND-NEXT: fmov w19, s4
; CHECK-EXPAND-NEXT: fmov w17, s24
; CHECK-EXPAND-NEXT: orr w9, w9, w16
; CHECK-EXPAND-NEXT: ubfiz w13, w13, #27, #1
; CHECK-EXPAND-NEXT: fmov w20, s5
; CHECK-EXPAND-NEXT: fmov w12, s3
; CHECK-EXPAND-NEXT: orr w9, w9, w10
; CHECK-EXPAND-NEXT: orr w9, w9, w13
; CHECK-EXPAND-NEXT: fmov w13, s2
; CHECK-EXPAND-NEXT: ubfiz w15, w19, #16, #1
; CHECK-EXPAND-NEXT: ubfiz w14, w17, #28, #1
; CHECK-EXPAND-NEXT: mov z1.b, z0.b[31]
; CHECK-EXPAND-NEXT: mov w11, #32 // =0x20
; CHECK-EXPAND-NEXT: ubfiz w10, w20, #17, #1
; CHECK-EXPAND-NEXT: ubfiz w12, w12, #29, #1
; CHECK-EXPAND-NEXT: orr w8, w8, w15
; CHECK-EXPAND-NEXT: orr w9, w9, w14
; CHECK-EXPAND-NEXT: ubfiz w13, w13, #30, #1
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x1, x11]
; CHECK-EXPAND-NEXT: orr w8, w8, w10
; CHECK-EXPAND-NEXT: orr w9, w9, w12
; CHECK-EXPAND-NEXT: ld1b { z3.b }, p0/z, [x0, x11]
; CHECK-EXPAND-NEXT: orr w8, w7, w8
; CHECK-EXPAND-NEXT: orr w9, w9, w13
; CHECK-EXPAND-NEXT: orr w8, w8, w9
; CHECK-EXPAND-NEXT: fmov w9, s1
; CHECK-EXPAND-NEXT: cmpeq p2.b, p0/z, z3.b, z0.b
; CHECK-EXPAND-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-EXPAND-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-EXPAND-NEXT: orr w8, w8, w9, lsl #31
; CHECK-EXPAND-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-EXPAND-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-EXPAND-NEXT: fmov s0, w8
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b
; CHECK-EXPAND-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s
; CHECK-EXPAND-NEXT: fmov w10, s0
; CHECK-EXPAND-NEXT: whilelo p3.b, xzr, x9
; CHECK-EXPAND-NEXT: whilelo p4.b, xzr, x8
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p3/z, [x0, x10]
; CHECK-EXPAND-NEXT: ld1b { z1.b }, p4/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b
; CHECK-EXPAND-NEXT: expand z1.b, p1, z1.b
; CHECK-EXPAND-NEXT: st1b { z0.b }, p0, [x2, x11]
; CHECK-EXPAND-NEXT: st1b { z1.b }, p0, [x2]
; CHECK-EXPAND-NEXT: add sp, sp, #96
; CHECK-EXPAND-NEXT: ret
%a = load <64 x i8>, ptr %ap
%b = load <64 x i8>, ptr %bp
%mask = icmp eq <64 x i8> %a, %b
%load = call <64 x i8> @llvm.masked.expandload.v64i8(ptr %ap, <64 x i1> %mask, <64 x i8> poison)
store <64 x i8> %load, ptr %c
ret void
}
define void @masked_load_v32i16(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1]
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1, x8, lsl #1]
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.h
; VBITS_GE_256-NEXT: mov z2.h, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: uzp1 z1.b, z0.b, z0.b
; VBITS_GE_256-NEXT: uzp1 z0.b, z2.b, z2.b
; VBITS_GE_256-NEXT: umov w8, v1.b[0]
; VBITS_GE_256-NEXT: umov w13, v1.b[1]
; VBITS_GE_256-NEXT: umov w9, v1.b[7]
; VBITS_GE_256-NEXT: umov w11, v0.b[3]
; VBITS_GE_256-NEXT: umov w12, v0.b[4]
; VBITS_GE_256-NEXT: umov w10, v1.b[8]
; VBITS_GE_256-NEXT: umov w16, v1.b[9]
; VBITS_GE_256-NEXT: umov w17, v1.b[10]
; VBITS_GE_256-NEXT: umov w18, v0.b[5]
; VBITS_GE_256-NEXT: umov w14, v1.b[2]
; VBITS_GE_256-NEXT: umov w15, v1.b[3]
; VBITS_GE_256-NEXT: umov w1, v1.b[4]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: ubfiz w9, w9, #7, #1
; VBITS_GE_256-NEXT: ubfiz w11, w11, #19, #1
; VBITS_GE_256-NEXT: ubfiz w12, w12, #20, #1
; VBITS_GE_256-NEXT: bfi w8, w13, #1, #1
; VBITS_GE_256-NEXT: umov w13, v0.b[6]
; VBITS_GE_256-NEXT: ubfiz w10, w10, #8, #1
; VBITS_GE_256-NEXT: ubfiz w16, w16, #9, #1
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: umov w12, v1.b[11]
; VBITS_GE_256-NEXT: ubfiz w17, w17, #10, #1
; VBITS_GE_256-NEXT: orr w9, w9, w10
; VBITS_GE_256-NEXT: ubfiz w18, w18, #21, #1
; VBITS_GE_256-NEXT: bfi w8, w14, #2, #1
; VBITS_GE_256-NEXT: umov w14, v0.b[7]
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: umov w16, v1.b[12]
; VBITS_GE_256-NEXT: ubfiz w13, w13, #22, #1
; VBITS_GE_256-NEXT: orr w11, w11, w18
; VBITS_GE_256-NEXT: umov w18, v0.b[8]
; VBITS_GE_256-NEXT: orr w9, w9, w17
; VBITS_GE_256-NEXT: umov w17, v1.b[13]
; VBITS_GE_256-NEXT: ubfiz w12, w12, #11, #1
; VBITS_GE_256-NEXT: orr w11, w11, w13
; VBITS_GE_256-NEXT: umov w13, v1.b[14]
; VBITS_GE_256-NEXT: bfi w8, w15, #3, #1
; VBITS_GE_256-NEXT: umov w15, v0.b[9]
; VBITS_GE_256-NEXT: orr w9, w9, w12
; VBITS_GE_256-NEXT: umov w12, v0.b[10]
; VBITS_GE_256-NEXT: ubfiz w14, w14, #23, #1
; VBITS_GE_256-NEXT: ubfiz w16, w16, #12, #1
; VBITS_GE_256-NEXT: ubfiz w18, w18, #24, #1
; VBITS_GE_256-NEXT: ubfiz w17, w17, #13, #1
; VBITS_GE_256-NEXT: umov w10, v1.b[5]
; VBITS_GE_256-NEXT: bfi w8, w1, #4, #1
; VBITS_GE_256-NEXT: orr w11, w11, w14
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: umov w16, v1.b[15]
; VBITS_GE_256-NEXT: ubfiz w15, w15, #25, #1
; VBITS_GE_256-NEXT: ubfiz w13, w13, #14, #1
; VBITS_GE_256-NEXT: orr w11, w11, w18
; VBITS_GE_256-NEXT: umov w18, v0.b[0]
; VBITS_GE_256-NEXT: umov w1, v0.b[11]
; VBITS_GE_256-NEXT: ubfiz w12, w12, #26, #1
; VBITS_GE_256-NEXT: orr w9, w9, w17
; VBITS_GE_256-NEXT: umov w17, v0.b[1]
; VBITS_GE_256-NEXT: orr w11, w11, w15
; VBITS_GE_256-NEXT: orr w9, w9, w13
; VBITS_GE_256-NEXT: umov w13, v0.b[12]
; VBITS_GE_256-NEXT: umov w14, v1.b[6]
; VBITS_GE_256-NEXT: umov w15, v0.b[2]
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: umov w12, v0.b[13]
; VBITS_GE_256-NEXT: ubfiz w16, w16, #15, #1
; VBITS_GE_256-NEXT: bfi w8, w10, #5, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[14]
; VBITS_GE_256-NEXT: ubfiz w1, w1, #27, #1
; VBITS_GE_256-NEXT: ubfiz w18, w18, #16, #1
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: ubfiz w16, w17, #17, #1
; VBITS_GE_256-NEXT: ubfiz w13, w13, #28, #1
; VBITS_GE_256-NEXT: orr w11, w11, w1
; VBITS_GE_256-NEXT: orr w9, w9, w18
; VBITS_GE_256-NEXT: bfi w8, w14, #6, #1
; VBITS_GE_256-NEXT: ubfiz w14, w15, #18, #1
; VBITS_GE_256-NEXT: ubfiz w12, w12, #29, #1
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: orr w11, w11, w13
; VBITS_GE_256-NEXT: ubfiz w10, w10, #30, #1
; VBITS_GE_256-NEXT: umov w13, v0.b[15]
; VBITS_GE_256-NEXT: orr w9, w9, w14
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: orr w8, w8, w9
; VBITS_GE_256-NEXT: orr w9, w11, w10
; VBITS_GE_256-NEXT: orr w8, w8, w9
; VBITS_GE_256-NEXT: adrp x9, .LCPI8_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI8_0
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x9]
; VBITS_GE_256-NEXT: orr w8, w8, w13, lsl #31
; VBITS_GE_256-NEXT: tbz w8, #0, .LBB8_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rh { z2.h }, p1/z, [x0]
; VBITS_GE_256-NEXT: mov z1.d, z0.d
; VBITS_GE_256-NEXT: add x0, x0, #2
; VBITS_GE_256-NEXT: mov z0.d, z2.d
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB8_3
; VBITS_GE_256-NEXT: b .LBB8_4
; VBITS_GE_256-NEXT: .LBB8_2:
; VBITS_GE_256-NEXT: mov z1.d, z0.d
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB8_4
; VBITS_GE_256-NEXT: .LBB8_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: .LBB8_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB8_36
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB8_37
; VBITS_GE_256-NEXT: .LBB8_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB8_38
; VBITS_GE_256-NEXT: .LBB8_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB8_39
; VBITS_GE_256-NEXT: .LBB8_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB8_40
; VBITS_GE_256-NEXT: .LBB8_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB8_41
; VBITS_GE_256-NEXT: .LBB8_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB8_42
; VBITS_GE_256-NEXT: .LBB8_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB8_43
; VBITS_GE_256-NEXT: .LBB8_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB8_44
; VBITS_GE_256-NEXT: .LBB8_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB8_45
; VBITS_GE_256-NEXT: .LBB8_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB8_46
; VBITS_GE_256-NEXT: .LBB8_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB8_47
; VBITS_GE_256-NEXT: .LBB8_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB8_48
; VBITS_GE_256-NEXT: .LBB8_17: // %else54
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB8_49
; VBITS_GE_256-NEXT: .LBB8_18: // %else58
; VBITS_GE_256-NEXT: tbnz w8, #16, .LBB8_50
; VBITS_GE_256-NEXT: .LBB8_19: // %else62
; VBITS_GE_256-NEXT: tbnz w8, #17, .LBB8_51
; VBITS_GE_256-NEXT: .LBB8_20: // %else66
; VBITS_GE_256-NEXT: tbnz w8, #18, .LBB8_52
; VBITS_GE_256-NEXT: .LBB8_21: // %else70
; VBITS_GE_256-NEXT: tbnz w8, #19, .LBB8_53
; VBITS_GE_256-NEXT: .LBB8_22: // %else74
; VBITS_GE_256-NEXT: tbnz w8, #20, .LBB8_54
; VBITS_GE_256-NEXT: .LBB8_23: // %else78
; VBITS_GE_256-NEXT: tbnz w8, #21, .LBB8_55
; VBITS_GE_256-NEXT: .LBB8_24: // %else82
; VBITS_GE_256-NEXT: tbnz w8, #22, .LBB8_56
; VBITS_GE_256-NEXT: .LBB8_25: // %else86
; VBITS_GE_256-NEXT: tbnz w8, #23, .LBB8_57
; VBITS_GE_256-NEXT: .LBB8_26: // %else90
; VBITS_GE_256-NEXT: tbnz w8, #24, .LBB8_58
; VBITS_GE_256-NEXT: .LBB8_27: // %else94
; VBITS_GE_256-NEXT: tbnz w8, #25, .LBB8_59
; VBITS_GE_256-NEXT: .LBB8_28: // %else98
; VBITS_GE_256-NEXT: tbnz w8, #26, .LBB8_60
; VBITS_GE_256-NEXT: .LBB8_29: // %else102
; VBITS_GE_256-NEXT: tbnz w8, #27, .LBB8_61
; VBITS_GE_256-NEXT: .LBB8_30: // %else106
; VBITS_GE_256-NEXT: tbnz w8, #28, .LBB8_62
; VBITS_GE_256-NEXT: .LBB8_31: // %else110
; VBITS_GE_256-NEXT: tbnz w8, #29, .LBB8_63
; VBITS_GE_256-NEXT: .LBB8_32: // %else114
; VBITS_GE_256-NEXT: tbnz w8, #30, .LBB8_64
; VBITS_GE_256-NEXT: .LBB8_33: // %else118
; VBITS_GE_256-NEXT: tbz w8, #31, .LBB8_35
; VBITS_GE_256-NEXT: .LBB8_34: // %cond.load121
; VBITS_GE_256-NEXT: mov w8, #15 // =0xf
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w8
; VBITS_GE_256-NEXT: ldrh w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w8
; VBITS_GE_256-NEXT: .LBB8_35: // %else122
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB8_36: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB8_6
; VBITS_GE_256-NEXT: .LBB8_37: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB8_7
; VBITS_GE_256-NEXT: .LBB8_38: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB8_8
; VBITS_GE_256-NEXT: .LBB8_39: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB8_9
; VBITS_GE_256-NEXT: .LBB8_40: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB8_10
; VBITS_GE_256-NEXT: .LBB8_41: // %cond.load25
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB8_11
; VBITS_GE_256-NEXT: .LBB8_42: // %cond.load29
; VBITS_GE_256-NEXT: mov w9, #8 // =0x8
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB8_12
; VBITS_GE_256-NEXT: .LBB8_43: // %cond.load33
; VBITS_GE_256-NEXT: mov w9, #9 // =0x9
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB8_13
; VBITS_GE_256-NEXT: .LBB8_44: // %cond.load37
; VBITS_GE_256-NEXT: mov w9, #10 // =0xa
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB8_14
; VBITS_GE_256-NEXT: .LBB8_45: // %cond.load41
; VBITS_GE_256-NEXT: mov w9, #11 // =0xb
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB8_15
; VBITS_GE_256-NEXT: .LBB8_46: // %cond.load45
; VBITS_GE_256-NEXT: mov w9, #12 // =0xc
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB8_16
; VBITS_GE_256-NEXT: .LBB8_47: // %cond.load49
; VBITS_GE_256-NEXT: mov w9, #13 // =0xd
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB8_17
; VBITS_GE_256-NEXT: .LBB8_48: // %cond.load53
; VBITS_GE_256-NEXT: mov w9, #14 // =0xe
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB8_18
; VBITS_GE_256-NEXT: .LBB8_49: // %cond.load57
; VBITS_GE_256-NEXT: mov w9, #15 // =0xf
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #16, .LBB8_19
; VBITS_GE_256-NEXT: .LBB8_50: // %cond.load61
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: ptrue p2.h, vl1
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #17, .LBB8_20
; VBITS_GE_256-NEXT: .LBB8_51: // %cond.load65
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #18, .LBB8_21
; VBITS_GE_256-NEXT: .LBB8_52: // %cond.load69
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #19, .LBB8_22
; VBITS_GE_256-NEXT: .LBB8_53: // %cond.load73
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #20, .LBB8_23
; VBITS_GE_256-NEXT: .LBB8_54: // %cond.load77
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #21, .LBB8_24
; VBITS_GE_256-NEXT: .LBB8_55: // %cond.load81
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #22, .LBB8_25
; VBITS_GE_256-NEXT: .LBB8_56: // %cond.load85
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #23, .LBB8_26
; VBITS_GE_256-NEXT: .LBB8_57: // %cond.load89
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #24, .LBB8_27
; VBITS_GE_256-NEXT: .LBB8_58: // %cond.load93
; VBITS_GE_256-NEXT: mov w9, #8 // =0x8
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #25, .LBB8_28
; VBITS_GE_256-NEXT: .LBB8_59: // %cond.load97
; VBITS_GE_256-NEXT: mov w9, #9 // =0x9
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #26, .LBB8_29
; VBITS_GE_256-NEXT: .LBB8_60: // %cond.load101
; VBITS_GE_256-NEXT: mov w9, #10 // =0xa
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #27, .LBB8_30
; VBITS_GE_256-NEXT: .LBB8_61: // %cond.load105
; VBITS_GE_256-NEXT: mov w9, #11 // =0xb
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #28, .LBB8_31
; VBITS_GE_256-NEXT: .LBB8_62: // %cond.load109
; VBITS_GE_256-NEXT: mov w9, #12 // =0xc
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #29, .LBB8_32
; VBITS_GE_256-NEXT: .LBB8_63: // %cond.load113
; VBITS_GE_256-NEXT: mov w9, #13 // =0xd
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #30, .LBB8_33
; VBITS_GE_256-NEXT: .LBB8_64: // %cond.load117
; VBITS_GE_256-NEXT: mov w9, #14 // =0xe
; VBITS_GE_256-NEXT: index z2.h, #0, #1
; VBITS_GE_256-NEXT: mov z3.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z1.h, p2/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #31, .LBB8_34
; VBITS_GE_256-NEXT: b .LBB8_35
;
; VBITS_GE_512-LABEL: masked_load_v32i16:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #112
; VBITS_GE_512-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 112
; VBITS_GE_512-NEXT: .cfi_offset w19, -8
; VBITS_GE_512-NEXT: .cfi_offset w20, -16
; VBITS_GE_512-NEXT: .cfi_offset w21, -24
; VBITS_GE_512-NEXT: .cfi_offset w22, -32
; VBITS_GE_512-NEXT: .cfi_offset w23, -40
; VBITS_GE_512-NEXT: .cfi_offset w24, -48
; VBITS_GE_512-NEXT: .cfi_offset w25, -56
; VBITS_GE_512-NEXT: .cfi_offset w26, -64
; VBITS_GE_512-NEXT: .cfi_offset w27, -72
; VBITS_GE_512-NEXT: .cfi_offset w28, -80
; VBITS_GE_512-NEXT: .cfi_offset w30, -88
; VBITS_GE_512-NEXT: .cfi_offset w29, -96
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: mov z5.b, z0.b[18]
; VBITS_GE_512-NEXT: mov z6.b, z0.b[19]
; VBITS_GE_512-NEXT: umov w12, v0.b[1]
; VBITS_GE_512-NEXT: mov z7.b, z0.b[20]
; VBITS_GE_512-NEXT: fmov w6, s0
; VBITS_GE_512-NEXT: umov w3, v0.b[7]
; VBITS_GE_512-NEXT: umov w5, v0.b[8]
; VBITS_GE_512-NEXT: umov w13, v0.b[2]
; VBITS_GE_512-NEXT: umov w4, v0.b[9]
; VBITS_GE_512-NEXT: mov z16.b, z0.b[21]
; VBITS_GE_512-NEXT: fmov w20, s5
; VBITS_GE_512-NEXT: fmov w21, s6
; VBITS_GE_512-NEXT: umov w1, v0.b[10]
; VBITS_GE_512-NEXT: mov z17.b, z0.b[22]
; VBITS_GE_512-NEXT: fmov w22, s7
; VBITS_GE_512-NEXT: and w6, w6, #0x1
; VBITS_GE_512-NEXT: umov w11, v0.b[3]
; VBITS_GE_512-NEXT: umov w16, v0.b[11]
; VBITS_GE_512-NEXT: bfi w6, w12, #1, #1
; VBITS_GE_512-NEXT: umov w18, v0.b[12]
; VBITS_GE_512-NEXT: mov z18.b, z0.b[23]
; VBITS_GE_512-NEXT: fmov w23, s16
; VBITS_GE_512-NEXT: ubfiz w12, w3, #7, #1
; VBITS_GE_512-NEXT: ubfiz w3, w5, #8, #1
; VBITS_GE_512-NEXT: ubfiz w5, w20, #18, #1
; VBITS_GE_512-NEXT: ubfiz w20, w21, #19, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: mov z19.b, z0.b[24]
; VBITS_GE_512-NEXT: fmov w24, s17
; VBITS_GE_512-NEXT: bfi w6, w13, #2, #1
; VBITS_GE_512-NEXT: ubfiz w13, w4, #9, #1
; VBITS_GE_512-NEXT: ubfiz w4, w22, #20, #1
; VBITS_GE_512-NEXT: umov w17, v0.b[13]
; VBITS_GE_512-NEXT: orr w12, w12, w3
; VBITS_GE_512-NEXT: orr w3, w5, w20
; VBITS_GE_512-NEXT: ubfiz w1, w1, #10, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: mov z20.b, z0.b[25]
; VBITS_GE_512-NEXT: fmov w25, s18
; VBITS_GE_512-NEXT: orr w12, w12, w13
; VBITS_GE_512-NEXT: orr w13, w3, w4
; VBITS_GE_512-NEXT: ubfiz w3, w23, #21, #1
; VBITS_GE_512-NEXT: bfi w6, w11, #3, #1
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: mov z21.b, z0.b[26]
; VBITS_GE_512-NEXT: fmov w26, s19
; VBITS_GE_512-NEXT: orr w11, w12, w1
; VBITS_GE_512-NEXT: ubfiz w12, w16, #11, #1
; VBITS_GE_512-NEXT: ubfiz w16, w18, #12, #1
; VBITS_GE_512-NEXT: ubfiz w18, w24, #22, #1
; VBITS_GE_512-NEXT: umov w15, v0.b[15]
; VBITS_GE_512-NEXT: mov z22.b, z0.b[27]
; VBITS_GE_512-NEXT: orr w13, w13, w3
; VBITS_GE_512-NEXT: bfi w6, w10, #4, #1
; VBITS_GE_512-NEXT: fmov w27, s20
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: ubfiz w10, w17, #13, #1
; VBITS_GE_512-NEXT: orr w12, w13, w18
; VBITS_GE_512-NEXT: ubfiz w13, w25, #23, #1
; VBITS_GE_512-NEXT: fmov w28, s21
; VBITS_GE_512-NEXT: orr w11, w11, w16
; VBITS_GE_512-NEXT: bfi w6, w9, #5, #1
; VBITS_GE_512-NEXT: ubfiz w9, w26, #24, #1
; VBITS_GE_512-NEXT: umov w8, v0.b[6]
; VBITS_GE_512-NEXT: mov z3.b, z0.b[16]
; VBITS_GE_512-NEXT: mov z23.b, z0.b[28]
; VBITS_GE_512-NEXT: fmov w29, s22
; VBITS_GE_512-NEXT: orr w10, w11, w10
; VBITS_GE_512-NEXT: orr w11, w12, w13
; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_512-NEXT: mov z4.b, z0.b[17]
; VBITS_GE_512-NEXT: mov z24.b, z0.b[29]
; VBITS_GE_512-NEXT: ubfiz w13, w27, #25, #1
; VBITS_GE_512-NEXT: orr w9, w11, w9
; VBITS_GE_512-NEXT: ubfiz w11, w15, #15, #1
; VBITS_GE_512-NEXT: mov z2.b, z0.b[30]
; VBITS_GE_512-NEXT: ubfiz w14, w28, #26, #1
; VBITS_GE_512-NEXT: orr w10, w10, w12
; VBITS_GE_512-NEXT: fmov w7, s3
; VBITS_GE_512-NEXT: fmov w30, s23
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: orr w10, w10, w11
; VBITS_GE_512-NEXT: ubfiz w11, w29, #27, #1
; VBITS_GE_512-NEXT: str w8, [sp, #8] // 4-byte Spill
; VBITS_GE_512-NEXT: fmov w19, s4
; VBITS_GE_512-NEXT: fmov w8, s24
; VBITS_GE_512-NEXT: orr w9, w9, w14
; VBITS_GE_512-NEXT: orr w9, w9, w11
; VBITS_GE_512-NEXT: fmov w11, s2
; VBITS_GE_512-NEXT: ldr w15, [sp, #8] // 4-byte Reload
; VBITS_GE_512-NEXT: ubfiz w12, w7, #16, #1
; VBITS_GE_512-NEXT: ubfiz w14, w30, #28, #1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[31]
; VBITS_GE_512-NEXT: ubfiz w13, w19, #17, #1
; VBITS_GE_512-NEXT: ubfiz w8, w8, #29, #1
; VBITS_GE_512-NEXT: bfi w6, w15, #6, #1
; VBITS_GE_512-NEXT: orr w10, w10, w12
; VBITS_GE_512-NEXT: orr w9, w9, w14
; VBITS_GE_512-NEXT: ubfiz w11, w11, #30, #1
; VBITS_GE_512-NEXT: orr w10, w10, w13
; VBITS_GE_512-NEXT: orr w8, w9, w8
; VBITS_GE_512-NEXT: orr w9, w6, w10
; VBITS_GE_512-NEXT: orr w8, w8, w11
; VBITS_GE_512-NEXT: orr w8, w9, w8
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: orr w8, w8, w9, lsl #31
; VBITS_GE_512-NEXT: tbz w8, #0, .LBB8_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rh { z0.h }, p1/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #2
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB8_3
; VBITS_GE_512-NEXT: b .LBB8_4
; VBITS_GE_512-NEXT: .LBB8_2:
; VBITS_GE_512-NEXT: adrp x9, .LCPI8_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI8_0
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB8_4
; VBITS_GE_512-NEXT: .LBB8_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: .LBB8_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB8_36
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB8_37
; VBITS_GE_512-NEXT: .LBB8_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB8_38
; VBITS_GE_512-NEXT: .LBB8_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB8_39
; VBITS_GE_512-NEXT: .LBB8_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB8_40
; VBITS_GE_512-NEXT: .LBB8_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB8_41
; VBITS_GE_512-NEXT: .LBB8_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB8_42
; VBITS_GE_512-NEXT: .LBB8_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB8_43
; VBITS_GE_512-NEXT: .LBB8_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB8_44
; VBITS_GE_512-NEXT: .LBB8_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB8_45
; VBITS_GE_512-NEXT: .LBB8_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB8_46
; VBITS_GE_512-NEXT: .LBB8_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB8_47
; VBITS_GE_512-NEXT: .LBB8_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB8_48
; VBITS_GE_512-NEXT: .LBB8_17: // %else54
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB8_49
; VBITS_GE_512-NEXT: .LBB8_18: // %else58
; VBITS_GE_512-NEXT: tbnz w8, #16, .LBB8_50
; VBITS_GE_512-NEXT: .LBB8_19: // %else62
; VBITS_GE_512-NEXT: tbnz w8, #17, .LBB8_51
; VBITS_GE_512-NEXT: .LBB8_20: // %else66
; VBITS_GE_512-NEXT: tbnz w8, #18, .LBB8_52
; VBITS_GE_512-NEXT: .LBB8_21: // %else70
; VBITS_GE_512-NEXT: tbnz w8, #19, .LBB8_53
; VBITS_GE_512-NEXT: .LBB8_22: // %else74
; VBITS_GE_512-NEXT: tbnz w8, #20, .LBB8_54
; VBITS_GE_512-NEXT: .LBB8_23: // %else78
; VBITS_GE_512-NEXT: tbnz w8, #21, .LBB8_55
; VBITS_GE_512-NEXT: .LBB8_24: // %else82
; VBITS_GE_512-NEXT: tbnz w8, #22, .LBB8_56
; VBITS_GE_512-NEXT: .LBB8_25: // %else86
; VBITS_GE_512-NEXT: tbnz w8, #23, .LBB8_57
; VBITS_GE_512-NEXT: .LBB8_26: // %else90
; VBITS_GE_512-NEXT: tbnz w8, #24, .LBB8_58
; VBITS_GE_512-NEXT: .LBB8_27: // %else94
; VBITS_GE_512-NEXT: tbnz w8, #25, .LBB8_59
; VBITS_GE_512-NEXT: .LBB8_28: // %else98
; VBITS_GE_512-NEXT: tbnz w8, #26, .LBB8_60
; VBITS_GE_512-NEXT: .LBB8_29: // %else102
; VBITS_GE_512-NEXT: tbnz w8, #27, .LBB8_61
; VBITS_GE_512-NEXT: .LBB8_30: // %else106
; VBITS_GE_512-NEXT: tbnz w8, #28, .LBB8_62
; VBITS_GE_512-NEXT: .LBB8_31: // %else110
; VBITS_GE_512-NEXT: tbnz w8, #29, .LBB8_63
; VBITS_GE_512-NEXT: .LBB8_32: // %else114
; VBITS_GE_512-NEXT: tbnz w8, #30, .LBB8_64
; VBITS_GE_512-NEXT: .LBB8_33: // %else118
; VBITS_GE_512-NEXT: tbz w8, #31, .LBB8_35
; VBITS_GE_512-NEXT: .LBB8_34: // %cond.load121
; VBITS_GE_512-NEXT: mov w8, #31 // =0x1f
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w8
; VBITS_GE_512-NEXT: ldrh w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w8
; VBITS_GE_512-NEXT: .LBB8_35: // %else122
; VBITS_GE_512-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2]
; VBITS_GE_512-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: add sp, sp, #112
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB8_36: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB8_6
; VBITS_GE_512-NEXT: .LBB8_37: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB8_7
; VBITS_GE_512-NEXT: .LBB8_38: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB8_8
; VBITS_GE_512-NEXT: .LBB8_39: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB8_9
; VBITS_GE_512-NEXT: .LBB8_40: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB8_10
; VBITS_GE_512-NEXT: .LBB8_41: // %cond.load25
; VBITS_GE_512-NEXT: mov w9, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB8_11
; VBITS_GE_512-NEXT: .LBB8_42: // %cond.load29
; VBITS_GE_512-NEXT: mov w9, #8 // =0x8
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB8_12
; VBITS_GE_512-NEXT: .LBB8_43: // %cond.load33
; VBITS_GE_512-NEXT: mov w9, #9 // =0x9
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB8_13
; VBITS_GE_512-NEXT: .LBB8_44: // %cond.load37
; VBITS_GE_512-NEXT: mov w9, #10 // =0xa
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB8_14
; VBITS_GE_512-NEXT: .LBB8_45: // %cond.load41
; VBITS_GE_512-NEXT: mov w9, #11 // =0xb
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB8_15
; VBITS_GE_512-NEXT: .LBB8_46: // %cond.load45
; VBITS_GE_512-NEXT: mov w9, #12 // =0xc
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB8_16
; VBITS_GE_512-NEXT: .LBB8_47: // %cond.load49
; VBITS_GE_512-NEXT: mov w9, #13 // =0xd
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB8_17
; VBITS_GE_512-NEXT: .LBB8_48: // %cond.load53
; VBITS_GE_512-NEXT: mov w9, #14 // =0xe
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB8_18
; VBITS_GE_512-NEXT: .LBB8_49: // %cond.load57
; VBITS_GE_512-NEXT: mov w9, #15 // =0xf
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #16, .LBB8_19
; VBITS_GE_512-NEXT: .LBB8_50: // %cond.load61
; VBITS_GE_512-NEXT: mov w9, #16 // =0x10
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #17, .LBB8_20
; VBITS_GE_512-NEXT: .LBB8_51: // %cond.load65
; VBITS_GE_512-NEXT: mov w9, #17 // =0x11
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #18, .LBB8_21
; VBITS_GE_512-NEXT: .LBB8_52: // %cond.load69
; VBITS_GE_512-NEXT: mov w9, #18 // =0x12
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #19, .LBB8_22
; VBITS_GE_512-NEXT: .LBB8_53: // %cond.load73
; VBITS_GE_512-NEXT: mov w9, #19 // =0x13
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #20, .LBB8_23
; VBITS_GE_512-NEXT: .LBB8_54: // %cond.load77
; VBITS_GE_512-NEXT: mov w9, #20 // =0x14
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #21, .LBB8_24
; VBITS_GE_512-NEXT: .LBB8_55: // %cond.load81
; VBITS_GE_512-NEXT: mov w9, #21 // =0x15
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #22, .LBB8_25
; VBITS_GE_512-NEXT: .LBB8_56: // %cond.load85
; VBITS_GE_512-NEXT: mov w9, #22 // =0x16
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #23, .LBB8_26
; VBITS_GE_512-NEXT: .LBB8_57: // %cond.load89
; VBITS_GE_512-NEXT: mov w9, #23 // =0x17
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #24, .LBB8_27
; VBITS_GE_512-NEXT: .LBB8_58: // %cond.load93
; VBITS_GE_512-NEXT: mov w9, #24 // =0x18
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #25, .LBB8_28
; VBITS_GE_512-NEXT: .LBB8_59: // %cond.load97
; VBITS_GE_512-NEXT: mov w9, #25 // =0x19
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #26, .LBB8_29
; VBITS_GE_512-NEXT: .LBB8_60: // %cond.load101
; VBITS_GE_512-NEXT: mov w9, #26 // =0x1a
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #27, .LBB8_30
; VBITS_GE_512-NEXT: .LBB8_61: // %cond.load105
; VBITS_GE_512-NEXT: mov w9, #27 // =0x1b
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #28, .LBB8_31
; VBITS_GE_512-NEXT: .LBB8_62: // %cond.load109
; VBITS_GE_512-NEXT: mov w9, #28 // =0x1c
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #29, .LBB8_32
; VBITS_GE_512-NEXT: .LBB8_63: // %cond.load113
; VBITS_GE_512-NEXT: mov w9, #29 // =0x1d
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #30, .LBB8_33
; VBITS_GE_512-NEXT: .LBB8_64: // %cond.load117
; VBITS_GE_512-NEXT: mov w9, #30 // =0x1e
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #31, .LBB8_34
; VBITS_GE_512-NEXT: b .LBB8_35
;
; CHECK-EXPAND-LABEL: masked_load_v32i16:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: sub sp, sp, #16
; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 16
; CHECK-EXPAND-NEXT: ptrue p0.h, vl16
; CHECK-EXPAND-NEXT: ptrue p3.s
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
; CHECK-EXPAND-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-EXPAND-NEXT: umov w8, v0.b[0]
; CHECK-EXPAND-NEXT: umov w9, v0.b[1]
; CHECK-EXPAND-NEXT: umov w10, v0.b[7]
; CHECK-EXPAND-NEXT: umov w11, v0.b[8]
; CHECK-EXPAND-NEXT: umov w12, v0.b[2]
; CHECK-EXPAND-NEXT: umov w13, v0.b[9]
; CHECK-EXPAND-NEXT: umov w14, v0.b[10]
; CHECK-EXPAND-NEXT: umov w15, v0.b[3]
; CHECK-EXPAND-NEXT: umov w16, v0.b[4]
; CHECK-EXPAND-NEXT: and w8, w8, #0x1
; CHECK-EXPAND-NEXT: ubfiz w10, w10, #7, #1
; CHECK-EXPAND-NEXT: bfi w8, w9, #1, #1
; CHECK-EXPAND-NEXT: umov w9, v0.b[11]
; CHECK-EXPAND-NEXT: ubfiz w11, w11, #8, #1
; CHECK-EXPAND-NEXT: ubfiz w13, w13, #9, #1
; CHECK-EXPAND-NEXT: ubfiz w14, w14, #10, #1
; CHECK-EXPAND-NEXT: bfi w8, w12, #2, #1
; CHECK-EXPAND-NEXT: orr w10, w10, w11
; CHECK-EXPAND-NEXT: umov w11, v0.b[12]
; CHECK-EXPAND-NEXT: umov w12, v0.b[5]
; CHECK-EXPAND-NEXT: orr w10, w10, w13
; CHECK-EXPAND-NEXT: umov w13, v0.b[13]
; CHECK-EXPAND-NEXT: bfi w8, w15, #3, #1
; CHECK-EXPAND-NEXT: umov w15, v0.b[14]
; CHECK-EXPAND-NEXT: ubfiz w9, w9, #11, #1
; CHECK-EXPAND-NEXT: orr w10, w10, w14
; CHECK-EXPAND-NEXT: mov x14, #16 // =0x10
; CHECK-EXPAND-NEXT: bfi w8, w16, #4, #1
; CHECK-EXPAND-NEXT: umov w16, v0.b[6]
; CHECK-EXPAND-NEXT: orr w9, w10, w9
; CHECK-EXPAND-NEXT: ubfiz w10, w11, #12, #1
; CHECK-EXPAND-NEXT: ubfiz w11, w13, #13, #1
; CHECK-EXPAND-NEXT: ld1h { z1.h }, p0/z, [x1, x14, lsl #1]
; CHECK-EXPAND-NEXT: bfi w8, w12, #5, #1
; CHECK-EXPAND-NEXT: ubfiz w12, w15, #14, #1
; CHECK-EXPAND-NEXT: ld1h { z2.h }, p0/z, [x0, x14, lsl #1]
; CHECK-EXPAND-NEXT: orr w9, w9, w10
; CHECK-EXPAND-NEXT: umov w10, v0.b[15]
; CHECK-EXPAND-NEXT: orr w9, w9, w11
; CHECK-EXPAND-NEXT: bfi w8, w16, #6, #1
; CHECK-EXPAND-NEXT: orr w9, w9, w12
; CHECK-EXPAND-NEXT: cmpeq p2.h, p0/z, z2.h, z1.h
; CHECK-EXPAND-NEXT: orr w8, w8, w9
; CHECK-EXPAND-NEXT: orr w8, w8, w10, lsl #15
; CHECK-EXPAND-NEXT: cntp x10, p1, p1.h
; CHECK-EXPAND-NEXT: and w8, w8, #0xffff
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.h
; CHECK-EXPAND-NEXT: fmov s0, w8
; CHECK-EXPAND-NEXT: whilelo p4.h, xzr, x10
; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s
; CHECK-EXPAND-NEXT: whilelo p3.h, xzr, x9
; CHECK-EXPAND-NEXT: ld1h { z1.h }, p4/z, [x0]
; CHECK-EXPAND-NEXT: fmov w8, s0
; CHECK-EXPAND-NEXT: expand z1.h, p1, z1.h
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p3/z, [x0, x8, lsl #1]
; CHECK-EXPAND-NEXT: st1h { z1.h }, p0, [x2]
; CHECK-EXPAND-NEXT: expand z0.h, p2, z0.h
; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2, x14, lsl #1]
; CHECK-EXPAND-NEXT: add sp, sp, #16
; CHECK-EXPAND-NEXT: ret
%a = load <32 x i16>, ptr %ap
%b = load <32 x i16>, ptr %bp
%mask = icmp eq <32 x i16> %a, %b
%load = call <32 x i16> @llvm.masked.expandload.v32i16(ptr %ap, <32 x i1> %mask, <32 x i16> poison)
store <32 x i16> %load, ptr %c
ret void
}
define void @masked_load_v16i32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: adrp x8, .LCPI9_0
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, z2.s
; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, z3.s
; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
; VBITS_GE_256-NEXT: ldr q0, [x8, :lo12:.LCPI9_0]
; VBITS_GE_256-NEXT: adrp x8, .LCPI9_1
; VBITS_GE_256-NEXT: add x8, x8, :lo12:.LCPI9_1
; VBITS_GE_256-NEXT: and v0.16b, v1.16b, v0.16b
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b
; VBITS_GE_256-NEXT: addv h1, v0.8h
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x8]
; VBITS_GE_256-NEXT: fmov w9, s1
; VBITS_GE_256-NEXT: fmov w8, s1
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB9_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rw { z2.s }, p1/z, [x0]
; VBITS_GE_256-NEXT: mov z1.d, z0.d
; VBITS_GE_256-NEXT: add x0, x0, #4
; VBITS_GE_256-NEXT: mov z0.d, z2.d
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB9_3
; VBITS_GE_256-NEXT: b .LBB9_4
; VBITS_GE_256-NEXT: .LBB9_2:
; VBITS_GE_256-NEXT: mov z1.d, z0.d
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB9_4
; VBITS_GE_256-NEXT: .LBB9_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: .LBB9_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB9_20
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB9_21
; VBITS_GE_256-NEXT: .LBB9_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB9_22
; VBITS_GE_256-NEXT: .LBB9_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB9_23
; VBITS_GE_256-NEXT: .LBB9_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB9_24
; VBITS_GE_256-NEXT: .LBB9_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB9_25
; VBITS_GE_256-NEXT: .LBB9_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB9_26
; VBITS_GE_256-NEXT: .LBB9_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB9_27
; VBITS_GE_256-NEXT: .LBB9_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB9_28
; VBITS_GE_256-NEXT: .LBB9_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB9_29
; VBITS_GE_256-NEXT: .LBB9_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB9_30
; VBITS_GE_256-NEXT: .LBB9_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB9_31
; VBITS_GE_256-NEXT: .LBB9_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB9_32
; VBITS_GE_256-NEXT: .LBB9_17: // %else54
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB9_19
; VBITS_GE_256-NEXT: .LBB9_18: // %cond.load57
; VBITS_GE_256-NEXT: mov w8, #7 // =0x7
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w8
; VBITS_GE_256-NEXT: ldr w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z1.s, p2/m, w8
; VBITS_GE_256-NEXT: .LBB9_19: // %else58
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB9_20: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB9_6
; VBITS_GE_256-NEXT: .LBB9_21: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB9_7
; VBITS_GE_256-NEXT: .LBB9_22: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB9_8
; VBITS_GE_256-NEXT: .LBB9_23: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB9_9
; VBITS_GE_256-NEXT: .LBB9_24: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB9_10
; VBITS_GE_256-NEXT: .LBB9_25: // %cond.load25
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB9_11
; VBITS_GE_256-NEXT: .LBB9_26: // %cond.load29
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: ptrue p2.s, vl1
; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB9_12
; VBITS_GE_256-NEXT: .LBB9_27: // %cond.load33
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB9_13
; VBITS_GE_256-NEXT: .LBB9_28: // %cond.load37
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB9_14
; VBITS_GE_256-NEXT: .LBB9_29: // %cond.load41
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB9_15
; VBITS_GE_256-NEXT: .LBB9_30: // %cond.load45
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB9_16
; VBITS_GE_256-NEXT: .LBB9_31: // %cond.load49
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB9_17
; VBITS_GE_256-NEXT: .LBB9_32: // %cond.load53
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z2.s, #0, #1
; VBITS_GE_256-NEXT: mov z3.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z1.s, p2/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB9_18
; VBITS_GE_256-NEXT: b .LBB9_19
;
; VBITS_GE_512-LABEL: masked_load_v16i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: umov w11, v0.b[7]
; VBITS_GE_512-NEXT: umov w12, v0.b[8]
; VBITS_GE_512-NEXT: umov w13, v0.b[3]
; VBITS_GE_512-NEXT: umov w14, v0.b[4]
; VBITS_GE_512-NEXT: umov w15, v0.b[10]
; VBITS_GE_512-NEXT: umov w16, v0.b[5]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[9]
; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1
; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1
; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[11]
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: umov w12, v0.b[13]
; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[12]
; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1
; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: orr w9, w11, w9
; VBITS_GE_512-NEXT: umov w11, v0.b[6]
; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1
; VBITS_GE_512-NEXT: orr w9, w9, w15
; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1
; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[15]
; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: orr w9, w9, w12
; VBITS_GE_512-NEXT: orr w8, w8, w9
; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15
; VBITS_GE_512-NEXT: and w8, w9, #0xffff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB9_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rw { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #4
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB9_3
; VBITS_GE_512-NEXT: b .LBB9_4
; VBITS_GE_512-NEXT: .LBB9_2:
; VBITS_GE_512-NEXT: adrp x9, .LCPI9_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI9_0
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB9_4
; VBITS_GE_512-NEXT: .LBB9_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: .LBB9_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB9_20
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB9_21
; VBITS_GE_512-NEXT: .LBB9_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB9_22
; VBITS_GE_512-NEXT: .LBB9_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB9_23
; VBITS_GE_512-NEXT: .LBB9_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB9_24
; VBITS_GE_512-NEXT: .LBB9_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB9_25
; VBITS_GE_512-NEXT: .LBB9_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB9_26
; VBITS_GE_512-NEXT: .LBB9_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB9_27
; VBITS_GE_512-NEXT: .LBB9_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB9_28
; VBITS_GE_512-NEXT: .LBB9_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB9_29
; VBITS_GE_512-NEXT: .LBB9_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB9_30
; VBITS_GE_512-NEXT: .LBB9_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB9_31
; VBITS_GE_512-NEXT: .LBB9_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB9_32
; VBITS_GE_512-NEXT: .LBB9_17: // %else54
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB9_19
; VBITS_GE_512-NEXT: .LBB9_18: // %cond.load57
; VBITS_GE_512-NEXT: mov w8, #15 // =0xf
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w8
; VBITS_GE_512-NEXT: ldr w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w8
; VBITS_GE_512-NEXT: .LBB9_19: // %else58
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB9_20: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB9_6
; VBITS_GE_512-NEXT: .LBB9_21: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB9_7
; VBITS_GE_512-NEXT: .LBB9_22: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB9_8
; VBITS_GE_512-NEXT: .LBB9_23: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB9_9
; VBITS_GE_512-NEXT: .LBB9_24: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB9_10
; VBITS_GE_512-NEXT: .LBB9_25: // %cond.load25
; VBITS_GE_512-NEXT: mov w9, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB9_11
; VBITS_GE_512-NEXT: .LBB9_26: // %cond.load29
; VBITS_GE_512-NEXT: mov w9, #8 // =0x8
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB9_12
; VBITS_GE_512-NEXT: .LBB9_27: // %cond.load33
; VBITS_GE_512-NEXT: mov w9, #9 // =0x9
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB9_13
; VBITS_GE_512-NEXT: .LBB9_28: // %cond.load37
; VBITS_GE_512-NEXT: mov w9, #10 // =0xa
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB9_14
; VBITS_GE_512-NEXT: .LBB9_29: // %cond.load41
; VBITS_GE_512-NEXT: mov w9, #11 // =0xb
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB9_15
; VBITS_GE_512-NEXT: .LBB9_30: // %cond.load45
; VBITS_GE_512-NEXT: mov w9, #12 // =0xc
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB9_16
; VBITS_GE_512-NEXT: .LBB9_31: // %cond.load49
; VBITS_GE_512-NEXT: mov w9, #13 // =0xd
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB9_17
; VBITS_GE_512-NEXT: .LBB9_32: // %cond.load53
; VBITS_GE_512-NEXT: mov w9, #14 // =0xe
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB9_18
; VBITS_GE_512-NEXT: b .LBB9_19
;
; CHECK-EXPAND-LABEL: masked_load_v16i32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: sub sp, sp, #16
; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 16
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: ptrue p3.s
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
; CHECK-EXPAND-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-EXPAND-NEXT: umov w8, v0.b[0]
; CHECK-EXPAND-NEXT: umov w9, v0.b[1]
; CHECK-EXPAND-NEXT: umov w10, v0.b[2]
; CHECK-EXPAND-NEXT: umov w11, v0.b[3]
; CHECK-EXPAND-NEXT: and w8, w8, #0x1
; CHECK-EXPAND-NEXT: bfi w8, w9, #1, #1
; CHECK-EXPAND-NEXT: umov w9, v0.b[4]
; CHECK-EXPAND-NEXT: bfi w8, w10, #2, #1
; CHECK-EXPAND-NEXT: umov w10, v0.b[5]
; CHECK-EXPAND-NEXT: bfi w8, w11, #3, #1
; CHECK-EXPAND-NEXT: mov x11, #8 // =0x8
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1, x11, lsl #2]
; CHECK-EXPAND-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2]
; CHECK-EXPAND-NEXT: bfi w8, w9, #4, #1
; CHECK-EXPAND-NEXT: umov w9, v0.b[6]
; CHECK-EXPAND-NEXT: bfi w8, w10, #5, #1
; CHECK-EXPAND-NEXT: umov w10, v0.b[7]
; CHECK-EXPAND-NEXT: cmpeq p2.s, p0/z, z2.s, z1.s
; CHECK-EXPAND-NEXT: bfi w8, w9, #6, #1
; CHECK-EXPAND-NEXT: orr w8, w8, w10, lsl #7
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.s
; CHECK-EXPAND-NEXT: cntp x10, p1, p1.s
; CHECK-EXPAND-NEXT: and w8, w8, #0xff
; CHECK-EXPAND-NEXT: fmov s0, w8
; CHECK-EXPAND-NEXT: whilelo p4.s, xzr, x10
; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s
; CHECK-EXPAND-NEXT: whilelo p3.s, xzr, x9
; CHECK-EXPAND-NEXT: fmov w8, s0
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p4/z, [x0]
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p3/z, [x0, x8, lsl #2]
; CHECK-EXPAND-NEXT: expand z1.s, p1, z1.s
; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2]
; CHECK-EXPAND-NEXT: expand z0.s, p2, z0.s
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2, x11, lsl #2]
; CHECK-EXPAND-NEXT: add sp, sp, #16
; CHECK-EXPAND-NEXT: ret
%a = load <16 x i32>, ptr %ap
%b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %a, %b
%load = call <16 x i32> @llvm.masked.expandload.v16i32(ptr %ap, <16 x i1> %mask, <16 x i32> poison)
store <16 x i32> %load, ptr %c
ret void
}
define void @masked_load_v8i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d
; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, z3.d
; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s, vl4
; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: adrp x8, .LCPI10_0
; VBITS_GE_256-NEXT: add x8, x8, :lo12:.LCPI10_0
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x8]
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB10_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rd { z2.d }, p1/z, [x0]
; VBITS_GE_256-NEXT: mov z1.d, z0.d
; VBITS_GE_256-NEXT: add x0, x0, #8
; VBITS_GE_256-NEXT: mov z0.d, z2.d
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB10_3
; VBITS_GE_256-NEXT: b .LBB10_4
; VBITS_GE_256-NEXT: .LBB10_2:
; VBITS_GE_256-NEXT: mov z1.d, z0.d
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB10_4
; VBITS_GE_256-NEXT: .LBB10_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_256-NEXT: .LBB10_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB10_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB10_13
; VBITS_GE_256-NEXT: .LBB10_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB10_14
; VBITS_GE_256-NEXT: .LBB10_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB10_15
; VBITS_GE_256-NEXT: .LBB10_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB10_16
; VBITS_GE_256-NEXT: .LBB10_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB10_11
; VBITS_GE_256-NEXT: .LBB10_10: // %cond.load25
; VBITS_GE_256-NEXT: mov w8, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: mov z3.d, x8
; VBITS_GE_256-NEXT: ldr x8, [x0]
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z1.d, p2/m, x8
; VBITS_GE_256-NEXT: .LBB10_11: // %else26
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB10_12: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB10_6
; VBITS_GE_256-NEXT: .LBB10_13: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB10_7
; VBITS_GE_256-NEXT: .LBB10_14: // %cond.load13
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: ptrue p2.d, vl1
; VBITS_GE_256-NEXT: mov z1.d, p2/m, x9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB10_8
; VBITS_GE_256-NEXT: .LBB10_15: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z1.d, p2/m, x9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB10_9
; VBITS_GE_256-NEXT: .LBB10_16: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z1.d, p2/m, x9
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB10_10
; VBITS_GE_256-NEXT: b .LBB10_11
;
; VBITS_GE_512-LABEL: masked_load_v8i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB10_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rd { z0.d }, p1/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #8
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB10_3
; VBITS_GE_512-NEXT: b .LBB10_4
; VBITS_GE_512-NEXT: .LBB10_2:
; VBITS_GE_512-NEXT: adrp x9, .LCPI10_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI10_0
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB10_4
; VBITS_GE_512-NEXT: .LBB10_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_512-NEXT: .LBB10_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB10_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB10_13
; VBITS_GE_512-NEXT: .LBB10_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB10_14
; VBITS_GE_512-NEXT: .LBB10_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB10_15
; VBITS_GE_512-NEXT: .LBB10_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB10_16
; VBITS_GE_512-NEXT: .LBB10_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB10_11
; VBITS_GE_512-NEXT: .LBB10_10: // %cond.load25
; VBITS_GE_512-NEXT: mov w8, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: mov z2.d, x8
; VBITS_GE_512-NEXT: ldr x8, [x0]
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x8
; VBITS_GE_512-NEXT: .LBB10_11: // %else26
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB10_12: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB10_6
; VBITS_GE_512-NEXT: .LBB10_13: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB10_7
; VBITS_GE_512-NEXT: .LBB10_14: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB10_8
; VBITS_GE_512-NEXT: .LBB10_15: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB10_9
; VBITS_GE_512-NEXT: .LBB10_16: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB10_10
; VBITS_GE_512-NEXT: b .LBB10_11
;
; CHECK-EXPAND-LABEL: masked_load_v8i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: sub sp, sp, #16
; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 16
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: mov x10, #4 // =0x4
; CHECK-EXPAND-NEXT: ptrue p3.s
; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: ld1d { z2.d }, p0/z, [x0, x10, lsl #3]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1, x10, lsl #3]
; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z2.d, z1.d
; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: umov w8, v0.h[0]
; CHECK-EXPAND-NEXT: umov w9, v0.h[1]
; CHECK-EXPAND-NEXT: umov w11, v0.h[2]
; CHECK-EXPAND-NEXT: and w8, w8, #0x1
; CHECK-EXPAND-NEXT: bfi w8, w9, #1, #1
; CHECK-EXPAND-NEXT: umov w9, v0.h[3]
; CHECK-EXPAND-NEXT: bfi w8, w11, #2, #1
; CHECK-EXPAND-NEXT: cntp x11, p1, p1.d
; CHECK-EXPAND-NEXT: orr w8, w8, w9, lsl #3
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.d
; CHECK-EXPAND-NEXT: and w8, w8, #0xf
; CHECK-EXPAND-NEXT: whilelo p4.d, xzr, x11
; CHECK-EXPAND-NEXT: fmov s0, w8
; CHECK-EXPAND-NEXT: ld1d { z1.d }, p4/z, [x0]
; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s
; CHECK-EXPAND-NEXT: whilelo p3.d, xzr, x9
; CHECK-EXPAND-NEXT: fmov w8, s0
; CHECK-EXPAND-NEXT: expand z1.d, p1, z1.d
; CHECK-EXPAND-NEXT: ld1d { z0.d }, p3/z, [x0, x8, lsl #3]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2]
; CHECK-EXPAND-NEXT: expand z0.d, p2, z0.d
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2, x10, lsl #3]
; CHECK-EXPAND-NEXT: add sp, sp, #16
; CHECK-EXPAND-NEXT: ret
%a = load <8 x i64>, ptr %ap
%b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %a, %b
%load = call <8 x i64> @llvm.masked.expandload.v8i64(ptr %ap, <8 x i1> %mask, <8 x i64> poison)
store <8 x i64> %load, ptr %c
ret void
}
define void @masked_load_passthru_v8i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_passthru_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z2.d, z1.d
; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z3.d, z0.d
; VBITS_GE_256-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s, vl4
; VBITS_GE_256-NEXT: mov z3.d, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: uzp1 z2.s, z2.s, z2.s
; VBITS_GE_256-NEXT: uzp1 z3.s, z3.s, z3.s
; VBITS_GE_256-NEXT: splice z3.s, p1, z3.s, z2.s
; VBITS_GE_256-NEXT: ptrue p1.d, vl1
; VBITS_GE_256-NEXT: uzp1 z2.h, z3.h, z3.h
; VBITS_GE_256-NEXT: uzp1 z2.b, z2.b, z2.b
; VBITS_GE_256-NEXT: umov w8, v2.b[0]
; VBITS_GE_256-NEXT: umov w9, v2.b[1]
; VBITS_GE_256-NEXT: umov w10, v2.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v2.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v2.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v2.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v2.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v2.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbnz w9, #0, .LBB11_10
; VBITS_GE_256-NEXT: // %bb.1: // %else
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB11_11
; VBITS_GE_256-NEXT: .LBB11_2: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB11_12
; VBITS_GE_256-NEXT: .LBB11_3: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB11_13
; VBITS_GE_256-NEXT: .LBB11_4: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB11_14
; VBITS_GE_256-NEXT: .LBB11_5: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB11_15
; VBITS_GE_256-NEXT: .LBB11_6: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB11_16
; VBITS_GE_256-NEXT: .LBB11_7: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB11_9
; VBITS_GE_256-NEXT: .LBB11_8: // %cond.load25
; VBITS_GE_256-NEXT: mov w8, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: mov z3.d, x8
; VBITS_GE_256-NEXT: ldr x8, [x0]
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z1.d, p2/m, x8
; VBITS_GE_256-NEXT: .LBB11_9: // %else26
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB11_10: // %cond.load
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: mov z0.d, p1/m, x9
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB11_2
; VBITS_GE_256-NEXT: .LBB11_11: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: ptrue p2.d
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: cmpeq p3.d, p2/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z0.d, p3/m, x9
; VBITS_GE_256-NEXT: tbz w8, #2, .LBB11_3
; VBITS_GE_256-NEXT: .LBB11_12: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: ptrue p2.d
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: cmpeq p3.d, p2/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z0.d, p3/m, x9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB11_4
; VBITS_GE_256-NEXT: .LBB11_13: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: ptrue p2.d
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: cmpeq p3.d, p2/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z0.d, p3/m, x9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB11_5
; VBITS_GE_256-NEXT: .LBB11_14: // %cond.load13
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: mov z1.d, p1/m, x9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB11_6
; VBITS_GE_256-NEXT: .LBB11_15: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z1.d, p2/m, x9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB11_7
; VBITS_GE_256-NEXT: .LBB11_16: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: ldr x9, [x0], #8
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z1.d, p2/m, x9
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB11_8
; VBITS_GE_256-NEXT: b .LBB11_9
;
; VBITS_GE_512-LABEL: masked_load_passthru_v8i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z1.d, z0.d
; VBITS_GE_512-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h
; VBITS_GE_512-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_512-NEXT: umov w8, v1.b[0]
; VBITS_GE_512-NEXT: umov w9, v1.b[1]
; VBITS_GE_512-NEXT: umov w10, v1.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v1.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v1.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v1.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v1.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v1.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbnz w9, #0, .LBB11_10
; VBITS_GE_512-NEXT: // %bb.1: // %else
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB11_11
; VBITS_GE_512-NEXT: .LBB11_2: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB11_12
; VBITS_GE_512-NEXT: .LBB11_3: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB11_13
; VBITS_GE_512-NEXT: .LBB11_4: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB11_14
; VBITS_GE_512-NEXT: .LBB11_5: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB11_15
; VBITS_GE_512-NEXT: .LBB11_6: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB11_16
; VBITS_GE_512-NEXT: .LBB11_7: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB11_9
; VBITS_GE_512-NEXT: .LBB11_8: // %cond.load25
; VBITS_GE_512-NEXT: mov w8, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x8
; VBITS_GE_512-NEXT: ldr x8, [x0]
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x8
; VBITS_GE_512-NEXT: .LBB11_9: // %else26
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB11_10: // %cond.load
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: ptrue p1.d, vl1
; VBITS_GE_512-NEXT: mov z0.d, p1/m, x9
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB11_2
; VBITS_GE_512-NEXT: .LBB11_11: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_512-NEXT: tbz w8, #2, .LBB11_3
; VBITS_GE_512-NEXT: .LBB11_12: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB11_4
; VBITS_GE_512-NEXT: .LBB11_13: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB11_5
; VBITS_GE_512-NEXT: .LBB11_14: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB11_6
; VBITS_GE_512-NEXT: .LBB11_15: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB11_7
; VBITS_GE_512-NEXT: .LBB11_16: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: ldr x9, [x0], #8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: mov z0.d, p2/m, x9
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB11_8
; VBITS_GE_512-NEXT: b .LBB11_9
;
; CHECK-EXPAND-LABEL: masked_load_passthru_v8i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: sub sp, sp, #16
; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 16
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: mov x10, #4 // =0x4
; CHECK-EXPAND-NEXT: ptrue p3.s
; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: ld1d { z2.d }, p0/z, [x1, x10, lsl #3]
; CHECK-EXPAND-NEXT: ld1d { z3.d }, p0/z, [x0, x10, lsl #3]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z3.d, z2.d
; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: umov w8, v0.h[0]
; CHECK-EXPAND-NEXT: umov w9, v0.h[1]
; CHECK-EXPAND-NEXT: umov w11, v0.h[2]
; CHECK-EXPAND-NEXT: and w8, w8, #0x1
; CHECK-EXPAND-NEXT: bfi w8, w9, #1, #1
; CHECK-EXPAND-NEXT: umov w9, v0.h[3]
; CHECK-EXPAND-NEXT: bfi w8, w11, #2, #1
; CHECK-EXPAND-NEXT: cntp x11, p1, p1.d
; CHECK-EXPAND-NEXT: orr w8, w8, w9, lsl #3
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.d
; CHECK-EXPAND-NEXT: and w8, w8, #0xf
; CHECK-EXPAND-NEXT: whilelo p4.d, xzr, x11
; CHECK-EXPAND-NEXT: fmov s0, w8
; CHECK-EXPAND-NEXT: ld1d { z3.d }, p4/z, [x0]
; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s
; CHECK-EXPAND-NEXT: whilelo p3.d, xzr, x9
; CHECK-EXPAND-NEXT: fmov w8, s0
; CHECK-EXPAND-NEXT: expand z3.d, p1, z3.d
; CHECK-EXPAND-NEXT: mov z1.d, p1/m, z3.d
; CHECK-EXPAND-NEXT: ld1d { z0.d }, p3/z, [x0, x8, lsl #3]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2]
; CHECK-EXPAND-NEXT: expand z0.d, p2, z0.d
; CHECK-EXPAND-NEXT: sel z0.d, p2, z0.d, z2.d
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2, x10, lsl #3]
; CHECK-EXPAND-NEXT: add sp, sp, #16
; CHECK-EXPAND-NEXT: ret
%a = load <8 x i64>, ptr %ap
%b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %a, %b
%load = call <8 x i64> @llvm.masked.expandload.v8i64(ptr %ap, <8 x i1> %mask, <8 x i64> %b)
store <8 x i64> %load, ptr %c
ret void
}
define void @masked_load_passthru_v8f64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_passthru_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z2.d, z1.d
; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z3.d, z0.d
; VBITS_GE_256-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s, vl4
; VBITS_GE_256-NEXT: mov z3.d, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: uzp1 z2.s, z2.s, z2.s
; VBITS_GE_256-NEXT: uzp1 z3.s, z3.s, z3.s
; VBITS_GE_256-NEXT: splice z3.s, p1, z3.s, z2.s
; VBITS_GE_256-NEXT: uzp1 z2.h, z3.h, z3.h
; VBITS_GE_256-NEXT: uzp1 z2.b, z2.b, z2.b
; VBITS_GE_256-NEXT: umov w8, v2.b[0]
; VBITS_GE_256-NEXT: umov w9, v2.b[1]
; VBITS_GE_256-NEXT: umov w10, v2.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v2.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v2.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v2.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v2.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v2.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbnz w9, #0, .LBB12_10
; VBITS_GE_256-NEXT: // %bb.1: // %else
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB12_11
; VBITS_GE_256-NEXT: .LBB12_2: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB12_12
; VBITS_GE_256-NEXT: .LBB12_3: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB12_13
; VBITS_GE_256-NEXT: .LBB12_4: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB12_14
; VBITS_GE_256-NEXT: .LBB12_5: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB12_15
; VBITS_GE_256-NEXT: .LBB12_6: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB12_16
; VBITS_GE_256-NEXT: .LBB12_7: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB12_9
; VBITS_GE_256-NEXT: .LBB12_8: // %cond.load25
; VBITS_GE_256-NEXT: mov w8, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: mov z3.d, x8
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: ldr d2, [x0]
; VBITS_GE_256-NEXT: mov z1.d, p2/m, d2
; VBITS_GE_256-NEXT: .LBB12_9: // %else26
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB12_10: // %cond.load
; VBITS_GE_256-NEXT: ldr d2, [x0], #8
; VBITS_GE_256-NEXT: ptrue p1.d, vl1
; VBITS_GE_256-NEXT: mov z0.d, p1/m, z2.d
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB12_2
; VBITS_GE_256-NEXT: .LBB12_11: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: ldr d2, [x0], #8
; VBITS_GE_256-NEXT: mov z0.d, p2/m, d2
; VBITS_GE_256-NEXT: tbz w8, #2, .LBB12_3
; VBITS_GE_256-NEXT: .LBB12_12: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: ldr d2, [x0], #8
; VBITS_GE_256-NEXT: mov z0.d, p2/m, d2
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB12_4
; VBITS_GE_256-NEXT: .LBB12_13: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: ldr d2, [x0], #8
; VBITS_GE_256-NEXT: mov z0.d, p2/m, d2
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB12_5
; VBITS_GE_256-NEXT: .LBB12_14: // %cond.load13
; VBITS_GE_256-NEXT: ldr d2, [x0], #8
; VBITS_GE_256-NEXT: ptrue p1.d, vl1
; VBITS_GE_256-NEXT: mov z1.d, p1/m, z2.d
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB12_6
; VBITS_GE_256-NEXT: .LBB12_15: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: ldr d2, [x0], #8
; VBITS_GE_256-NEXT: mov z1.d, p2/m, d2
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB12_7
; VBITS_GE_256-NEXT: .LBB12_16: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z2.d, #0, #1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: mov z3.d, x9
; VBITS_GE_256-NEXT: cmpeq p2.d, p1/z, z2.d, z3.d
; VBITS_GE_256-NEXT: ldr d2, [x0], #8
; VBITS_GE_256-NEXT: mov z1.d, p2/m, d2
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB12_8
; VBITS_GE_256-NEXT: b .LBB12_9
;
; VBITS_GE_512-LABEL: masked_load_passthru_v8f64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: fcmeq p1.d, p0/z, z1.d, z0.d
; VBITS_GE_512-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h
; VBITS_GE_512-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_512-NEXT: umov w8, v1.b[0]
; VBITS_GE_512-NEXT: umov w9, v1.b[1]
; VBITS_GE_512-NEXT: umov w10, v1.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v1.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v1.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v1.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v1.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v1.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbnz w9, #0, .LBB12_10
; VBITS_GE_512-NEXT: // %bb.1: // %else
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB12_11
; VBITS_GE_512-NEXT: .LBB12_2: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB12_12
; VBITS_GE_512-NEXT: .LBB12_3: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB12_13
; VBITS_GE_512-NEXT: .LBB12_4: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB12_14
; VBITS_GE_512-NEXT: .LBB12_5: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB12_15
; VBITS_GE_512-NEXT: .LBB12_6: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB12_16
; VBITS_GE_512-NEXT: .LBB12_7: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB12_9
; VBITS_GE_512-NEXT: .LBB12_8: // %cond.load25
; VBITS_GE_512-NEXT: mov w8, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x8
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: ldr d1, [x0]
; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1
; VBITS_GE_512-NEXT: .LBB12_9: // %else26
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB12_10: // %cond.load
; VBITS_GE_512-NEXT: ldr d1, [x0], #8
; VBITS_GE_512-NEXT: ptrue p1.d, vl1
; VBITS_GE_512-NEXT: mov z0.d, p1/m, z1.d
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB12_2
; VBITS_GE_512-NEXT: .LBB12_11: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: ldr d1, [x0], #8
; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1
; VBITS_GE_512-NEXT: tbz w8, #2, .LBB12_3
; VBITS_GE_512-NEXT: .LBB12_12: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: ldr d1, [x0], #8
; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB12_4
; VBITS_GE_512-NEXT: .LBB12_13: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: ldr d1, [x0], #8
; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB12_5
; VBITS_GE_512-NEXT: .LBB12_14: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: ldr d1, [x0], #8
; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB12_6
; VBITS_GE_512-NEXT: .LBB12_15: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: ldr d1, [x0], #8
; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB12_7
; VBITS_GE_512-NEXT: .LBB12_16: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.d, #0, #1
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x9
; VBITS_GE_512-NEXT: cmpeq p2.d, p1/z, z1.d, z2.d
; VBITS_GE_512-NEXT: ldr d1, [x0], #8
; VBITS_GE_512-NEXT: mov z0.d, p2/m, d1
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB12_8
; VBITS_GE_512-NEXT: b .LBB12_9
;
; CHECK-EXPAND-LABEL: masked_load_passthru_v8f64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: sub sp, sp, #16
; CHECK-EXPAND-NEXT: .cfi_def_cfa_offset 16
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: mov x10, #4 // =0x4
; CHECK-EXPAND-NEXT: ptrue p3.s
; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: ld1d { z2.d }, p0/z, [x1, x10, lsl #3]
; CHECK-EXPAND-NEXT: ld1d { z3.d }, p0/z, [x0, x10, lsl #3]
; CHECK-EXPAND-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
; CHECK-EXPAND-NEXT: fcmeq p2.d, p0/z, z3.d, z2.d
; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: umov w8, v0.h[0]
; CHECK-EXPAND-NEXT: umov w9, v0.h[1]
; CHECK-EXPAND-NEXT: umov w11, v0.h[2]
; CHECK-EXPAND-NEXT: and w8, w8, #0x1
; CHECK-EXPAND-NEXT: bfi w8, w9, #1, #1
; CHECK-EXPAND-NEXT: umov w9, v0.h[3]
; CHECK-EXPAND-NEXT: bfi w8, w11, #2, #1
; CHECK-EXPAND-NEXT: cntp x11, p1, p1.d
; CHECK-EXPAND-NEXT: orr w8, w8, w9, lsl #3
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.d
; CHECK-EXPAND-NEXT: and w8, w8, #0xf
; CHECK-EXPAND-NEXT: whilelo p4.d, xzr, x11
; CHECK-EXPAND-NEXT: fmov s0, w8
; CHECK-EXPAND-NEXT: ld1d { z3.d }, p4/z, [x0]
; CHECK-EXPAND-NEXT: cnt z0.s, p3/z, z0.s
; CHECK-EXPAND-NEXT: whilelo p3.d, xzr, x9
; CHECK-EXPAND-NEXT: fmov w8, s0
; CHECK-EXPAND-NEXT: expand z3.d, p1, z3.d
; CHECK-EXPAND-NEXT: mov z1.d, p1/m, z3.d
; CHECK-EXPAND-NEXT: ld1d { z0.d }, p3/z, [x0, x8, lsl #3]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2]
; CHECK-EXPAND-NEXT: expand z0.d, p2, z0.d
; CHECK-EXPAND-NEXT: sel z0.d, p2, z0.d, z2.d
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2, x10, lsl #3]
; CHECK-EXPAND-NEXT: add sp, sp, #16
; CHECK-EXPAND-NEXT: ret
%a = load <8 x double>, ptr %ap
%b = load <8 x double>, ptr %bp
%mask = fcmp oeq <8 x double> %a, %b
%load = call <8 x double> @llvm.masked.expandload.v8f64(ptr %ap, <8 x i1> %mask, <8 x double> %b)
store <8 x double> %load, ptr %c
ret void
}
define void @masked_load_sext_v32i8i16(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v32i8i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #112
; VBITS_GE_256-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 112
; VBITS_GE_256-NEXT: .cfi_offset w19, -8
; VBITS_GE_256-NEXT: .cfi_offset w20, -16
; VBITS_GE_256-NEXT: .cfi_offset w21, -24
; VBITS_GE_256-NEXT: .cfi_offset w22, -32
; VBITS_GE_256-NEXT: .cfi_offset w23, -40
; VBITS_GE_256-NEXT: .cfi_offset w24, -48
; VBITS_GE_256-NEXT: .cfi_offset w25, -56
; VBITS_GE_256-NEXT: .cfi_offset w26, -64
; VBITS_GE_256-NEXT: .cfi_offset w27, -72
; VBITS_GE_256-NEXT: .cfi_offset w28, -80
; VBITS_GE_256-NEXT: .cfi_offset w30, -88
; VBITS_GE_256-NEXT: .cfi_offset w29, -96
; VBITS_GE_256-NEXT: ptrue p1.b, vl32
; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p0.b, p1/z, z0.b, #0
; VBITS_GE_256-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p0.b
; VBITS_GE_256-NEXT: umov w13, v0.b[1]
; VBITS_GE_256-NEXT: fmov w6, s0
; VBITS_GE_256-NEXT: umov w4, v0.b[7]
; VBITS_GE_256-NEXT: umov w5, v0.b[8]
; VBITS_GE_256-NEXT: umov w12, v0.b[2]
; VBITS_GE_256-NEXT: umov w3, v0.b[9]
; VBITS_GE_256-NEXT: mov z5.b, z0.b[18]
; VBITS_GE_256-NEXT: mov z6.b, z0.b[19]
; VBITS_GE_256-NEXT: umov w11, v0.b[3]
; VBITS_GE_256-NEXT: and w6, w6, #0x1
; VBITS_GE_256-NEXT: umov w1, v0.b[10]
; VBITS_GE_256-NEXT: mov z7.b, z0.b[20]
; VBITS_GE_256-NEXT: bfi w6, w13, #1, #1
; VBITS_GE_256-NEXT: umov w18, v0.b[11]
; VBITS_GE_256-NEXT: mov z16.b, z0.b[21]
; VBITS_GE_256-NEXT: ubfiz w13, w4, #7, #1
; VBITS_GE_256-NEXT: ubfiz w4, w5, #8, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: mov z17.b, z0.b[22]
; VBITS_GE_256-NEXT: fmov w20, s5
; VBITS_GE_256-NEXT: fmov w21, s6
; VBITS_GE_256-NEXT: bfi w6, w12, #2, #1
; VBITS_GE_256-NEXT: umov w16, v0.b[12]
; VBITS_GE_256-NEXT: mov z18.b, z0.b[23]
; VBITS_GE_256-NEXT: fmov w22, s7
; VBITS_GE_256-NEXT: orr w12, w13, w4
; VBITS_GE_256-NEXT: ubfiz w13, w3, #9, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: umov w17, v0.b[13]
; VBITS_GE_256-NEXT: mov z19.b, z0.b[24]
; VBITS_GE_256-NEXT: fmov w23, s16
; VBITS_GE_256-NEXT: bfi w6, w11, #3, #1
; VBITS_GE_256-NEXT: ubfiz w11, w1, #10, #1
; VBITS_GE_256-NEXT: mov z20.b, z0.b[25]
; VBITS_GE_256-NEXT: fmov w24, s17
; VBITS_GE_256-NEXT: ubfiz w3, w20, #18, #1
; VBITS_GE_256-NEXT: ubfiz w4, w21, #19, #1
; VBITS_GE_256-NEXT: orr w12, w12, w13
; VBITS_GE_256-NEXT: ubfiz w13, w18, #11, #1
; VBITS_GE_256-NEXT: mov z21.b, z0.b[26]
; VBITS_GE_256-NEXT: fmov w25, s18
; VBITS_GE_256-NEXT: ubfiz w1, w22, #20, #1
; VBITS_GE_256-NEXT: orr w11, w12, w11
; VBITS_GE_256-NEXT: bfi w6, w10, #4, #1
; VBITS_GE_256-NEXT: umov w14, v0.b[14]
; VBITS_GE_256-NEXT: fmov w26, s19
; VBITS_GE_256-NEXT: orr w3, w3, w4
; VBITS_GE_256-NEXT: orr w11, w11, w13
; VBITS_GE_256-NEXT: ubfiz w12, w16, #12, #1
; VBITS_GE_256-NEXT: ubfiz w13, w23, #21, #1
; VBITS_GE_256-NEXT: mov z22.b, z0.b[27]
; VBITS_GE_256-NEXT: fmov w27, s20
; VBITS_GE_256-NEXT: orr w10, w3, w1
; VBITS_GE_256-NEXT: bfi w6, w9, #5, #1
; VBITS_GE_256-NEXT: ubfiz w9, w17, #13, #1
; VBITS_GE_256-NEXT: ubfiz w16, w24, #22, #1
; VBITS_GE_256-NEXT: umov w8, v0.b[6]
; VBITS_GE_256-NEXT: umov w15, v0.b[15]
; VBITS_GE_256-NEXT: mov z3.b, z0.b[16]
; VBITS_GE_256-NEXT: mov z23.b, z0.b[28]
; VBITS_GE_256-NEXT: fmov w5, s21
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: orr w10, w10, w13
; VBITS_GE_256-NEXT: ubfiz w12, w25, #23, #1
; VBITS_GE_256-NEXT: mov z4.b, z0.b[17]
; VBITS_GE_256-NEXT: mov z24.b, z0.b[29]
; VBITS_GE_256-NEXT: orr w9, w11, w9
; VBITS_GE_256-NEXT: orr w10, w10, w16
; VBITS_GE_256-NEXT: ubfiz w11, w26, #24, #1
; VBITS_GE_256-NEXT: mov z2.b, z0.b[30]
; VBITS_GE_256-NEXT: fmov w28, s22
; VBITS_GE_256-NEXT: orr w10, w10, w12
; VBITS_GE_256-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_256-NEXT: ubfiz w13, w27, #25, #1
; VBITS_GE_256-NEXT: fmov w7, s3
; VBITS_GE_256-NEXT: fmov w29, s23
; VBITS_GE_256-NEXT: orr w10, w10, w11
; VBITS_GE_256-NEXT: ubfiz w14, w5, #26, #1
; VBITS_GE_256-NEXT: fmov w19, s4
; VBITS_GE_256-NEXT: fmov w30, s24
; VBITS_GE_256-NEXT: ubfiz w11, w15, #15, #1
; VBITS_GE_256-NEXT: bfi w6, w8, #6, #1
; VBITS_GE_256-NEXT: orr w8, w9, w12
; VBITS_GE_256-NEXT: orr w9, w10, w13
; VBITS_GE_256-NEXT: orr w9, w9, w14
; VBITS_GE_256-NEXT: ubfiz w10, w28, #27, #1
; VBITS_GE_256-NEXT: fmov w14, s2
; VBITS_GE_256-NEXT: orr w8, w8, w11
; VBITS_GE_256-NEXT: ubfiz w11, w7, #16, #1
; VBITS_GE_256-NEXT: ubfiz w13, w29, #28, #1
; VBITS_GE_256-NEXT: ubfiz w12, w19, #17, #1
; VBITS_GE_256-NEXT: orr w9, w9, w10
; VBITS_GE_256-NEXT: ubfiz w10, w30, #29, #1
; VBITS_GE_256-NEXT: mov z1.b, z0.b[31]
; VBITS_GE_256-NEXT: orr w8, w8, w11
; VBITS_GE_256-NEXT: orr w9, w9, w13
; VBITS_GE_256-NEXT: ubfiz w11, w14, #30, #1
; VBITS_GE_256-NEXT: orr w8, w8, w12
; VBITS_GE_256-NEXT: orr w9, w9, w10
; VBITS_GE_256-NEXT: orr w8, w6, w8
; VBITS_GE_256-NEXT: orr w9, w9, w11
; VBITS_GE_256-NEXT: orr w8, w8, w9
; VBITS_GE_256-NEXT: fmov w9, s1
; VBITS_GE_256-NEXT: orr w8, w8, w9, lsl #31
; VBITS_GE_256-NEXT: tbz w8, #0, .LBB13_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rb { z0.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #1
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB13_3
; VBITS_GE_256-NEXT: b .LBB13_4
; VBITS_GE_256-NEXT: .LBB13_2:
; VBITS_GE_256-NEXT: adrp x9, .LCPI13_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI13_0
; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB13_4
; VBITS_GE_256-NEXT: .LBB13_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: .LBB13_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB13_36
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB13_37
; VBITS_GE_256-NEXT: .LBB13_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB13_38
; VBITS_GE_256-NEXT: .LBB13_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB13_39
; VBITS_GE_256-NEXT: .LBB13_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB13_40
; VBITS_GE_256-NEXT: .LBB13_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB13_41
; VBITS_GE_256-NEXT: .LBB13_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB13_42
; VBITS_GE_256-NEXT: .LBB13_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB13_43
; VBITS_GE_256-NEXT: .LBB13_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB13_44
; VBITS_GE_256-NEXT: .LBB13_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB13_45
; VBITS_GE_256-NEXT: .LBB13_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB13_46
; VBITS_GE_256-NEXT: .LBB13_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB13_47
; VBITS_GE_256-NEXT: .LBB13_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB13_48
; VBITS_GE_256-NEXT: .LBB13_17: // %else54
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB13_49
; VBITS_GE_256-NEXT: .LBB13_18: // %else58
; VBITS_GE_256-NEXT: tbnz w8, #16, .LBB13_50
; VBITS_GE_256-NEXT: .LBB13_19: // %else62
; VBITS_GE_256-NEXT: tbnz w8, #17, .LBB13_51
; VBITS_GE_256-NEXT: .LBB13_20: // %else66
; VBITS_GE_256-NEXT: tbnz w8, #18, .LBB13_52
; VBITS_GE_256-NEXT: .LBB13_21: // %else70
; VBITS_GE_256-NEXT: tbnz w8, #19, .LBB13_53
; VBITS_GE_256-NEXT: .LBB13_22: // %else74
; VBITS_GE_256-NEXT: tbnz w8, #20, .LBB13_54
; VBITS_GE_256-NEXT: .LBB13_23: // %else78
; VBITS_GE_256-NEXT: tbnz w8, #21, .LBB13_55
; VBITS_GE_256-NEXT: .LBB13_24: // %else82
; VBITS_GE_256-NEXT: tbnz w8, #22, .LBB13_56
; VBITS_GE_256-NEXT: .LBB13_25: // %else86
; VBITS_GE_256-NEXT: tbnz w8, #23, .LBB13_57
; VBITS_GE_256-NEXT: .LBB13_26: // %else90
; VBITS_GE_256-NEXT: tbnz w8, #24, .LBB13_58
; VBITS_GE_256-NEXT: .LBB13_27: // %else94
; VBITS_GE_256-NEXT: tbnz w8, #25, .LBB13_59
; VBITS_GE_256-NEXT: .LBB13_28: // %else98
; VBITS_GE_256-NEXT: tbnz w8, #26, .LBB13_60
; VBITS_GE_256-NEXT: .LBB13_29: // %else102
; VBITS_GE_256-NEXT: tbnz w8, #27, .LBB13_61
; VBITS_GE_256-NEXT: .LBB13_30: // %else106
; VBITS_GE_256-NEXT: tbnz w8, #28, .LBB13_62
; VBITS_GE_256-NEXT: .LBB13_31: // %else110
; VBITS_GE_256-NEXT: tbnz w8, #29, .LBB13_63
; VBITS_GE_256-NEXT: .LBB13_32: // %else114
; VBITS_GE_256-NEXT: tbnz w8, #30, .LBB13_64
; VBITS_GE_256-NEXT: .LBB13_33: // %else118
; VBITS_GE_256-NEXT: tbz w8, #31, .LBB13_35
; VBITS_GE_256-NEXT: .LBB13_34: // %cond.load121
; VBITS_GE_256-NEXT: mov w8, #31 // =0x1f
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w8
; VBITS_GE_256-NEXT: ldrb w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w8
; VBITS_GE_256-NEXT: .LBB13_35: // %else122
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b
; VBITS_GE_256-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2]
; VBITS_GE_256-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1]
; VBITS_GE_256-NEXT: add sp, sp, #112
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB13_36: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB13_6
; VBITS_GE_256-NEXT: .LBB13_37: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB13_7
; VBITS_GE_256-NEXT: .LBB13_38: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB13_8
; VBITS_GE_256-NEXT: .LBB13_39: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB13_9
; VBITS_GE_256-NEXT: .LBB13_40: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB13_10
; VBITS_GE_256-NEXT: .LBB13_41: // %cond.load25
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB13_11
; VBITS_GE_256-NEXT: .LBB13_42: // %cond.load29
; VBITS_GE_256-NEXT: mov w9, #8 // =0x8
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB13_12
; VBITS_GE_256-NEXT: .LBB13_43: // %cond.load33
; VBITS_GE_256-NEXT: mov w9, #9 // =0x9
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB13_13
; VBITS_GE_256-NEXT: .LBB13_44: // %cond.load37
; VBITS_GE_256-NEXT: mov w9, #10 // =0xa
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB13_14
; VBITS_GE_256-NEXT: .LBB13_45: // %cond.load41
; VBITS_GE_256-NEXT: mov w9, #11 // =0xb
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB13_15
; VBITS_GE_256-NEXT: .LBB13_46: // %cond.load45
; VBITS_GE_256-NEXT: mov w9, #12 // =0xc
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB13_16
; VBITS_GE_256-NEXT: .LBB13_47: // %cond.load49
; VBITS_GE_256-NEXT: mov w9, #13 // =0xd
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB13_17
; VBITS_GE_256-NEXT: .LBB13_48: // %cond.load53
; VBITS_GE_256-NEXT: mov w9, #14 // =0xe
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB13_18
; VBITS_GE_256-NEXT: .LBB13_49: // %cond.load57
; VBITS_GE_256-NEXT: mov w9, #15 // =0xf
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #16, .LBB13_19
; VBITS_GE_256-NEXT: .LBB13_50: // %cond.load61
; VBITS_GE_256-NEXT: mov w9, #16 // =0x10
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #17, .LBB13_20
; VBITS_GE_256-NEXT: .LBB13_51: // %cond.load65
; VBITS_GE_256-NEXT: mov w9, #17 // =0x11
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #18, .LBB13_21
; VBITS_GE_256-NEXT: .LBB13_52: // %cond.load69
; VBITS_GE_256-NEXT: mov w9, #18 // =0x12
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #19, .LBB13_22
; VBITS_GE_256-NEXT: .LBB13_53: // %cond.load73
; VBITS_GE_256-NEXT: mov w9, #19 // =0x13
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #20, .LBB13_23
; VBITS_GE_256-NEXT: .LBB13_54: // %cond.load77
; VBITS_GE_256-NEXT: mov w9, #20 // =0x14
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #21, .LBB13_24
; VBITS_GE_256-NEXT: .LBB13_55: // %cond.load81
; VBITS_GE_256-NEXT: mov w9, #21 // =0x15
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #22, .LBB13_25
; VBITS_GE_256-NEXT: .LBB13_56: // %cond.load85
; VBITS_GE_256-NEXT: mov w9, #22 // =0x16
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #23, .LBB13_26
; VBITS_GE_256-NEXT: .LBB13_57: // %cond.load89
; VBITS_GE_256-NEXT: mov w9, #23 // =0x17
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #24, .LBB13_27
; VBITS_GE_256-NEXT: .LBB13_58: // %cond.load93
; VBITS_GE_256-NEXT: mov w9, #24 // =0x18
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #25, .LBB13_28
; VBITS_GE_256-NEXT: .LBB13_59: // %cond.load97
; VBITS_GE_256-NEXT: mov w9, #25 // =0x19
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #26, .LBB13_29
; VBITS_GE_256-NEXT: .LBB13_60: // %cond.load101
; VBITS_GE_256-NEXT: mov w9, #26 // =0x1a
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #27, .LBB13_30
; VBITS_GE_256-NEXT: .LBB13_61: // %cond.load105
; VBITS_GE_256-NEXT: mov w9, #27 // =0x1b
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #28, .LBB13_31
; VBITS_GE_256-NEXT: .LBB13_62: // %cond.load109
; VBITS_GE_256-NEXT: mov w9, #28 // =0x1c
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #29, .LBB13_32
; VBITS_GE_256-NEXT: .LBB13_63: // %cond.load113
; VBITS_GE_256-NEXT: mov w9, #29 // =0x1d
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #30, .LBB13_33
; VBITS_GE_256-NEXT: .LBB13_64: // %cond.load117
; VBITS_GE_256-NEXT: mov w9, #30 // =0x1e
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #31, .LBB13_34
; VBITS_GE_256-NEXT: b .LBB13_35
;
; VBITS_GE_512-LABEL: masked_load_sext_v32i8i16:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #112
; VBITS_GE_512-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 112
; VBITS_GE_512-NEXT: .cfi_offset w19, -8
; VBITS_GE_512-NEXT: .cfi_offset w20, -16
; VBITS_GE_512-NEXT: .cfi_offset w21, -24
; VBITS_GE_512-NEXT: .cfi_offset w22, -32
; VBITS_GE_512-NEXT: .cfi_offset w23, -40
; VBITS_GE_512-NEXT: .cfi_offset w24, -48
; VBITS_GE_512-NEXT: .cfi_offset w25, -56
; VBITS_GE_512-NEXT: .cfi_offset w26, -64
; VBITS_GE_512-NEXT: .cfi_offset w27, -72
; VBITS_GE_512-NEXT: .cfi_offset w28, -80
; VBITS_GE_512-NEXT: .cfi_offset w30, -88
; VBITS_GE_512-NEXT: .cfi_offset w29, -96
; VBITS_GE_512-NEXT: ptrue p1.b, vl32
; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p0.b, p1/z, z0.b, #0
; VBITS_GE_512-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p0.b
; VBITS_GE_512-NEXT: umov w13, v0.b[1]
; VBITS_GE_512-NEXT: fmov w6, s0
; VBITS_GE_512-NEXT: umov w4, v0.b[7]
; VBITS_GE_512-NEXT: umov w5, v0.b[8]
; VBITS_GE_512-NEXT: umov w12, v0.b[2]
; VBITS_GE_512-NEXT: umov w3, v0.b[9]
; VBITS_GE_512-NEXT: mov z5.b, z0.b[18]
; VBITS_GE_512-NEXT: mov z6.b, z0.b[19]
; VBITS_GE_512-NEXT: umov w11, v0.b[3]
; VBITS_GE_512-NEXT: and w6, w6, #0x1
; VBITS_GE_512-NEXT: umov w1, v0.b[10]
; VBITS_GE_512-NEXT: mov z7.b, z0.b[20]
; VBITS_GE_512-NEXT: bfi w6, w13, #1, #1
; VBITS_GE_512-NEXT: umov w18, v0.b[11]
; VBITS_GE_512-NEXT: mov z16.b, z0.b[21]
; VBITS_GE_512-NEXT: ubfiz w13, w4, #7, #1
; VBITS_GE_512-NEXT: ubfiz w4, w5, #8, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: mov z17.b, z0.b[22]
; VBITS_GE_512-NEXT: fmov w20, s5
; VBITS_GE_512-NEXT: fmov w21, s6
; VBITS_GE_512-NEXT: bfi w6, w12, #2, #1
; VBITS_GE_512-NEXT: umov w16, v0.b[12]
; VBITS_GE_512-NEXT: mov z18.b, z0.b[23]
; VBITS_GE_512-NEXT: fmov w22, s7
; VBITS_GE_512-NEXT: orr w12, w13, w4
; VBITS_GE_512-NEXT: ubfiz w13, w3, #9, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: umov w17, v0.b[13]
; VBITS_GE_512-NEXT: mov z19.b, z0.b[24]
; VBITS_GE_512-NEXT: fmov w23, s16
; VBITS_GE_512-NEXT: bfi w6, w11, #3, #1
; VBITS_GE_512-NEXT: ubfiz w11, w1, #10, #1
; VBITS_GE_512-NEXT: mov z20.b, z0.b[25]
; VBITS_GE_512-NEXT: fmov w24, s17
; VBITS_GE_512-NEXT: ubfiz w3, w20, #18, #1
; VBITS_GE_512-NEXT: ubfiz w4, w21, #19, #1
; VBITS_GE_512-NEXT: orr w12, w12, w13
; VBITS_GE_512-NEXT: ubfiz w13, w18, #11, #1
; VBITS_GE_512-NEXT: mov z21.b, z0.b[26]
; VBITS_GE_512-NEXT: fmov w25, s18
; VBITS_GE_512-NEXT: ubfiz w1, w22, #20, #1
; VBITS_GE_512-NEXT: orr w11, w12, w11
; VBITS_GE_512-NEXT: bfi w6, w10, #4, #1
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: fmov w26, s19
; VBITS_GE_512-NEXT: orr w3, w3, w4
; VBITS_GE_512-NEXT: orr w11, w11, w13
; VBITS_GE_512-NEXT: ubfiz w12, w16, #12, #1
; VBITS_GE_512-NEXT: ubfiz w13, w23, #21, #1
; VBITS_GE_512-NEXT: mov z22.b, z0.b[27]
; VBITS_GE_512-NEXT: fmov w27, s20
; VBITS_GE_512-NEXT: orr w10, w3, w1
; VBITS_GE_512-NEXT: bfi w6, w9, #5, #1
; VBITS_GE_512-NEXT: ubfiz w9, w17, #13, #1
; VBITS_GE_512-NEXT: ubfiz w16, w24, #22, #1
; VBITS_GE_512-NEXT: umov w8, v0.b[6]
; VBITS_GE_512-NEXT: umov w15, v0.b[15]
; VBITS_GE_512-NEXT: mov z3.b, z0.b[16]
; VBITS_GE_512-NEXT: mov z23.b, z0.b[28]
; VBITS_GE_512-NEXT: fmov w5, s21
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: orr w10, w10, w13
; VBITS_GE_512-NEXT: ubfiz w12, w25, #23, #1
; VBITS_GE_512-NEXT: mov z4.b, z0.b[17]
; VBITS_GE_512-NEXT: mov z24.b, z0.b[29]
; VBITS_GE_512-NEXT: orr w9, w11, w9
; VBITS_GE_512-NEXT: orr w10, w10, w16
; VBITS_GE_512-NEXT: ubfiz w11, w26, #24, #1
; VBITS_GE_512-NEXT: mov z2.b, z0.b[30]
; VBITS_GE_512-NEXT: fmov w28, s22
; VBITS_GE_512-NEXT: orr w10, w10, w12
; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_512-NEXT: ubfiz w13, w27, #25, #1
; VBITS_GE_512-NEXT: fmov w7, s3
; VBITS_GE_512-NEXT: fmov w29, s23
; VBITS_GE_512-NEXT: orr w10, w10, w11
; VBITS_GE_512-NEXT: ubfiz w14, w5, #26, #1
; VBITS_GE_512-NEXT: fmov w19, s4
; VBITS_GE_512-NEXT: fmov w30, s24
; VBITS_GE_512-NEXT: ubfiz w11, w15, #15, #1
; VBITS_GE_512-NEXT: bfi w6, w8, #6, #1
; VBITS_GE_512-NEXT: orr w8, w9, w12
; VBITS_GE_512-NEXT: orr w9, w10, w13
; VBITS_GE_512-NEXT: orr w9, w9, w14
; VBITS_GE_512-NEXT: ubfiz w10, w28, #27, #1
; VBITS_GE_512-NEXT: fmov w14, s2
; VBITS_GE_512-NEXT: orr w8, w8, w11
; VBITS_GE_512-NEXT: ubfiz w11, w7, #16, #1
; VBITS_GE_512-NEXT: ubfiz w13, w29, #28, #1
; VBITS_GE_512-NEXT: ubfiz w12, w19, #17, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: ubfiz w10, w30, #29, #1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[31]
; VBITS_GE_512-NEXT: orr w8, w8, w11
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: ubfiz w11, w14, #30, #1
; VBITS_GE_512-NEXT: orr w8, w8, w12
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: orr w8, w6, w8
; VBITS_GE_512-NEXT: orr w9, w9, w11
; VBITS_GE_512-NEXT: orr w8, w8, w9
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: orr w8, w8, w9, lsl #31
; VBITS_GE_512-NEXT: tbz w8, #0, .LBB13_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rb { z0.b }, p0/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #1
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB13_3
; VBITS_GE_512-NEXT: b .LBB13_4
; VBITS_GE_512-NEXT: .LBB13_2:
; VBITS_GE_512-NEXT: adrp x9, .LCPI13_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI13_0
; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB13_4
; VBITS_GE_512-NEXT: .LBB13_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: .LBB13_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB13_36
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB13_37
; VBITS_GE_512-NEXT: .LBB13_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB13_38
; VBITS_GE_512-NEXT: .LBB13_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB13_39
; VBITS_GE_512-NEXT: .LBB13_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB13_40
; VBITS_GE_512-NEXT: .LBB13_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB13_41
; VBITS_GE_512-NEXT: .LBB13_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB13_42
; VBITS_GE_512-NEXT: .LBB13_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB13_43
; VBITS_GE_512-NEXT: .LBB13_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB13_44
; VBITS_GE_512-NEXT: .LBB13_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB13_45
; VBITS_GE_512-NEXT: .LBB13_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB13_46
; VBITS_GE_512-NEXT: .LBB13_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB13_47
; VBITS_GE_512-NEXT: .LBB13_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB13_48
; VBITS_GE_512-NEXT: .LBB13_17: // %else54
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB13_49
; VBITS_GE_512-NEXT: .LBB13_18: // %else58
; VBITS_GE_512-NEXT: tbnz w8, #16, .LBB13_50
; VBITS_GE_512-NEXT: .LBB13_19: // %else62
; VBITS_GE_512-NEXT: tbnz w8, #17, .LBB13_51
; VBITS_GE_512-NEXT: .LBB13_20: // %else66
; VBITS_GE_512-NEXT: tbnz w8, #18, .LBB13_52
; VBITS_GE_512-NEXT: .LBB13_21: // %else70
; VBITS_GE_512-NEXT: tbnz w8, #19, .LBB13_53
; VBITS_GE_512-NEXT: .LBB13_22: // %else74
; VBITS_GE_512-NEXT: tbnz w8, #20, .LBB13_54
; VBITS_GE_512-NEXT: .LBB13_23: // %else78
; VBITS_GE_512-NEXT: tbnz w8, #21, .LBB13_55
; VBITS_GE_512-NEXT: .LBB13_24: // %else82
; VBITS_GE_512-NEXT: tbnz w8, #22, .LBB13_56
; VBITS_GE_512-NEXT: .LBB13_25: // %else86
; VBITS_GE_512-NEXT: tbnz w8, #23, .LBB13_57
; VBITS_GE_512-NEXT: .LBB13_26: // %else90
; VBITS_GE_512-NEXT: tbnz w8, #24, .LBB13_58
; VBITS_GE_512-NEXT: .LBB13_27: // %else94
; VBITS_GE_512-NEXT: tbnz w8, #25, .LBB13_59
; VBITS_GE_512-NEXT: .LBB13_28: // %else98
; VBITS_GE_512-NEXT: tbnz w8, #26, .LBB13_60
; VBITS_GE_512-NEXT: .LBB13_29: // %else102
; VBITS_GE_512-NEXT: tbnz w8, #27, .LBB13_61
; VBITS_GE_512-NEXT: .LBB13_30: // %else106
; VBITS_GE_512-NEXT: tbnz w8, #28, .LBB13_62
; VBITS_GE_512-NEXT: .LBB13_31: // %else110
; VBITS_GE_512-NEXT: tbnz w8, #29, .LBB13_63
; VBITS_GE_512-NEXT: .LBB13_32: // %else114
; VBITS_GE_512-NEXT: tbnz w8, #30, .LBB13_64
; VBITS_GE_512-NEXT: .LBB13_33: // %else118
; VBITS_GE_512-NEXT: tbz w8, #31, .LBB13_35
; VBITS_GE_512-NEXT: .LBB13_34: // %cond.load121
; VBITS_GE_512-NEXT: mov w8, #31 // =0x1f
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w8
; VBITS_GE_512-NEXT: ldrb w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w8
; VBITS_GE_512-NEXT: .LBB13_35: // %else122
; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
; VBITS_GE_512-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2]
; VBITS_GE_512-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: add sp, sp, #112
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB13_36: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB13_6
; VBITS_GE_512-NEXT: .LBB13_37: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB13_7
; VBITS_GE_512-NEXT: .LBB13_38: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB13_8
; VBITS_GE_512-NEXT: .LBB13_39: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB13_9
; VBITS_GE_512-NEXT: .LBB13_40: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB13_10
; VBITS_GE_512-NEXT: .LBB13_41: // %cond.load25
; VBITS_GE_512-NEXT: mov w9, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB13_11
; VBITS_GE_512-NEXT: .LBB13_42: // %cond.load29
; VBITS_GE_512-NEXT: mov w9, #8 // =0x8
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB13_12
; VBITS_GE_512-NEXT: .LBB13_43: // %cond.load33
; VBITS_GE_512-NEXT: mov w9, #9 // =0x9
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB13_13
; VBITS_GE_512-NEXT: .LBB13_44: // %cond.load37
; VBITS_GE_512-NEXT: mov w9, #10 // =0xa
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB13_14
; VBITS_GE_512-NEXT: .LBB13_45: // %cond.load41
; VBITS_GE_512-NEXT: mov w9, #11 // =0xb
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB13_15
; VBITS_GE_512-NEXT: .LBB13_46: // %cond.load45
; VBITS_GE_512-NEXT: mov w9, #12 // =0xc
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB13_16
; VBITS_GE_512-NEXT: .LBB13_47: // %cond.load49
; VBITS_GE_512-NEXT: mov w9, #13 // =0xd
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB13_17
; VBITS_GE_512-NEXT: .LBB13_48: // %cond.load53
; VBITS_GE_512-NEXT: mov w9, #14 // =0xe
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB13_18
; VBITS_GE_512-NEXT: .LBB13_49: // %cond.load57
; VBITS_GE_512-NEXT: mov w9, #15 // =0xf
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #16, .LBB13_19
; VBITS_GE_512-NEXT: .LBB13_50: // %cond.load61
; VBITS_GE_512-NEXT: mov w9, #16 // =0x10
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #17, .LBB13_20
; VBITS_GE_512-NEXT: .LBB13_51: // %cond.load65
; VBITS_GE_512-NEXT: mov w9, #17 // =0x11
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #18, .LBB13_21
; VBITS_GE_512-NEXT: .LBB13_52: // %cond.load69
; VBITS_GE_512-NEXT: mov w9, #18 // =0x12
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #19, .LBB13_22
; VBITS_GE_512-NEXT: .LBB13_53: // %cond.load73
; VBITS_GE_512-NEXT: mov w9, #19 // =0x13
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #20, .LBB13_23
; VBITS_GE_512-NEXT: .LBB13_54: // %cond.load77
; VBITS_GE_512-NEXT: mov w9, #20 // =0x14
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #21, .LBB13_24
; VBITS_GE_512-NEXT: .LBB13_55: // %cond.load81
; VBITS_GE_512-NEXT: mov w9, #21 // =0x15
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #22, .LBB13_25
; VBITS_GE_512-NEXT: .LBB13_56: // %cond.load85
; VBITS_GE_512-NEXT: mov w9, #22 // =0x16
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #23, .LBB13_26
; VBITS_GE_512-NEXT: .LBB13_57: // %cond.load89
; VBITS_GE_512-NEXT: mov w9, #23 // =0x17
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #24, .LBB13_27
; VBITS_GE_512-NEXT: .LBB13_58: // %cond.load93
; VBITS_GE_512-NEXT: mov w9, #24 // =0x18
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #25, .LBB13_28
; VBITS_GE_512-NEXT: .LBB13_59: // %cond.load97
; VBITS_GE_512-NEXT: mov w9, #25 // =0x19
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #26, .LBB13_29
; VBITS_GE_512-NEXT: .LBB13_60: // %cond.load101
; VBITS_GE_512-NEXT: mov w9, #26 // =0x1a
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #27, .LBB13_30
; VBITS_GE_512-NEXT: .LBB13_61: // %cond.load105
; VBITS_GE_512-NEXT: mov w9, #27 // =0x1b
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #28, .LBB13_31
; VBITS_GE_512-NEXT: .LBB13_62: // %cond.load109
; VBITS_GE_512-NEXT: mov w9, #28 // =0x1c
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #29, .LBB13_32
; VBITS_GE_512-NEXT: .LBB13_63: // %cond.load113
; VBITS_GE_512-NEXT: mov w9, #29 // =0x1d
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #30, .LBB13_33
; VBITS_GE_512-NEXT: .LBB13_64: // %cond.load117
; VBITS_GE_512-NEXT: mov w9, #30 // =0x1e
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #31, .LBB13_34
; VBITS_GE_512-NEXT: b .LBB13_35
;
; CHECK-EXPAND-LABEL: masked_load_sext_v32i8i16:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.b, vl32
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b
; CHECK-EXPAND-NEXT: whilelo p0.b, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #16 // =0x10
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.h, vl16
; CHECK-EXPAND-NEXT: expand z0.b, p1, z0.b
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: sunpklo z0.h, z0.b
; CHECK-EXPAND-NEXT: sunpklo z1.h, z1.b
; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-EXPAND-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1]
; CHECK-EXPAND-NEXT: ret
%b = load <32 x i8>, ptr %bp
%mask = icmp eq <32 x i8> %b, zeroinitializer
%load = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %ap, <32 x i1> %mask, <32 x i8> poison)
%ext = sext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, ptr %c
ret void
}
define void @masked_load_sext_v16i8i32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v16i8i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x1]
; VBITS_GE_256-NEXT: adrp x8, .LCPI14_0
; VBITS_GE_256-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
; VBITS_GE_256-NEXT: cmeq v0.16b, v0.16b, #0
; VBITS_GE_256-NEXT: and v0.16b, v0.16b, v1.16b
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b
; VBITS_GE_256-NEXT: addv h0, v0.8h
; VBITS_GE_256-NEXT: fmov w8, s0
; VBITS_GE_256-NEXT: tbz w8, #0, .LBB14_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: fmov s0, w9
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB14_3
; VBITS_GE_256-NEXT: b .LBB14_4
; VBITS_GE_256-NEXT: .LBB14_2:
; VBITS_GE_256-NEXT: // implicit-def: $q0
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB14_4
; VBITS_GE_256-NEXT: .LBB14_3: // %cond.load1
; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_256-NEXT: .LBB14_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB14_20
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB14_21
; VBITS_GE_256-NEXT: .LBB14_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB14_22
; VBITS_GE_256-NEXT: .LBB14_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB14_23
; VBITS_GE_256-NEXT: .LBB14_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB14_24
; VBITS_GE_256-NEXT: .LBB14_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB14_25
; VBITS_GE_256-NEXT: .LBB14_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB14_26
; VBITS_GE_256-NEXT: .LBB14_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB14_27
; VBITS_GE_256-NEXT: .LBB14_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB14_28
; VBITS_GE_256-NEXT: .LBB14_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB14_29
; VBITS_GE_256-NEXT: .LBB14_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB14_30
; VBITS_GE_256-NEXT: .LBB14_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB14_31
; VBITS_GE_256-NEXT: .LBB14_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB14_32
; VBITS_GE_256-NEXT: .LBB14_17: // %else54
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB14_19
; VBITS_GE_256-NEXT: .LBB14_18: // %cond.load57
; VBITS_GE_256-NEXT: ld1 { v0.b }[15], [x0]
; VBITS_GE_256-NEXT: .LBB14_19: // %else58
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB14_20: // %cond.load5
; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB14_6
; VBITS_GE_256-NEXT: .LBB14_21: // %cond.load9
; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB14_7
; VBITS_GE_256-NEXT: .LBB14_22: // %cond.load13
; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB14_8
; VBITS_GE_256-NEXT: .LBB14_23: // %cond.load17
; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB14_9
; VBITS_GE_256-NEXT: .LBB14_24: // %cond.load21
; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB14_10
; VBITS_GE_256-NEXT: .LBB14_25: // %cond.load25
; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB14_11
; VBITS_GE_256-NEXT: .LBB14_26: // %cond.load29
; VBITS_GE_256-NEXT: ld1 { v0.b }[8], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB14_12
; VBITS_GE_256-NEXT: .LBB14_27: // %cond.load33
; VBITS_GE_256-NEXT: ld1 { v0.b }[9], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB14_13
; VBITS_GE_256-NEXT: .LBB14_28: // %cond.load37
; VBITS_GE_256-NEXT: ld1 { v0.b }[10], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB14_14
; VBITS_GE_256-NEXT: .LBB14_29: // %cond.load41
; VBITS_GE_256-NEXT: ld1 { v0.b }[11], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB14_15
; VBITS_GE_256-NEXT: .LBB14_30: // %cond.load45
; VBITS_GE_256-NEXT: ld1 { v0.b }[12], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB14_16
; VBITS_GE_256-NEXT: .LBB14_31: // %cond.load49
; VBITS_GE_256-NEXT: ld1 { v0.b }[13], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB14_17
; VBITS_GE_256-NEXT: .LBB14_32: // %cond.load53
; VBITS_GE_256-NEXT: ld1 { v0.b }[14], [x0], #1
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB14_18
; VBITS_GE_256-NEXT: b .LBB14_19
;
; VBITS_GE_512-LABEL: masked_load_sext_v16i8i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ldr q0, [x1]
; VBITS_GE_512-NEXT: adrp x8, .LCPI14_0
; VBITS_GE_512-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
; VBITS_GE_512-NEXT: cmeq v0.16b, v0.16b, #0
; VBITS_GE_512-NEXT: and v0.16b, v0.16b, v1.16b
; VBITS_GE_512-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_512-NEXT: zip1 v0.16b, v0.16b, v1.16b
; VBITS_GE_512-NEXT: addv h0, v0.8h
; VBITS_GE_512-NEXT: fmov w8, s0
; VBITS_GE_512-NEXT: tbz w8, #0, .LBB14_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: fmov s0, w9
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB14_3
; VBITS_GE_512-NEXT: b .LBB14_4
; VBITS_GE_512-NEXT: .LBB14_2:
; VBITS_GE_512-NEXT: // implicit-def: $q0
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB14_4
; VBITS_GE_512-NEXT: .LBB14_3: // %cond.load1
; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_512-NEXT: .LBB14_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB14_20
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB14_21
; VBITS_GE_512-NEXT: .LBB14_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB14_22
; VBITS_GE_512-NEXT: .LBB14_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB14_23
; VBITS_GE_512-NEXT: .LBB14_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB14_24
; VBITS_GE_512-NEXT: .LBB14_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB14_25
; VBITS_GE_512-NEXT: .LBB14_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB14_26
; VBITS_GE_512-NEXT: .LBB14_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB14_27
; VBITS_GE_512-NEXT: .LBB14_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB14_28
; VBITS_GE_512-NEXT: .LBB14_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB14_29
; VBITS_GE_512-NEXT: .LBB14_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB14_30
; VBITS_GE_512-NEXT: .LBB14_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB14_31
; VBITS_GE_512-NEXT: .LBB14_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB14_32
; VBITS_GE_512-NEXT: .LBB14_17: // %else54
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB14_19
; VBITS_GE_512-NEXT: .LBB14_18: // %cond.load57
; VBITS_GE_512-NEXT: ld1 { v0.b }[15], [x0]
; VBITS_GE_512-NEXT: .LBB14_19: // %else58
; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB14_20: // %cond.load5
; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB14_6
; VBITS_GE_512-NEXT: .LBB14_21: // %cond.load9
; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB14_7
; VBITS_GE_512-NEXT: .LBB14_22: // %cond.load13
; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB14_8
; VBITS_GE_512-NEXT: .LBB14_23: // %cond.load17
; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB14_9
; VBITS_GE_512-NEXT: .LBB14_24: // %cond.load21
; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB14_10
; VBITS_GE_512-NEXT: .LBB14_25: // %cond.load25
; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB14_11
; VBITS_GE_512-NEXT: .LBB14_26: // %cond.load29
; VBITS_GE_512-NEXT: ld1 { v0.b }[8], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB14_12
; VBITS_GE_512-NEXT: .LBB14_27: // %cond.load33
; VBITS_GE_512-NEXT: ld1 { v0.b }[9], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB14_13
; VBITS_GE_512-NEXT: .LBB14_28: // %cond.load37
; VBITS_GE_512-NEXT: ld1 { v0.b }[10], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB14_14
; VBITS_GE_512-NEXT: .LBB14_29: // %cond.load41
; VBITS_GE_512-NEXT: ld1 { v0.b }[11], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB14_15
; VBITS_GE_512-NEXT: .LBB14_30: // %cond.load45
; VBITS_GE_512-NEXT: ld1 { v0.b }[12], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB14_16
; VBITS_GE_512-NEXT: .LBB14_31: // %cond.load49
; VBITS_GE_512-NEXT: ld1 { v0.b }[13], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB14_17
; VBITS_GE_512-NEXT: .LBB14_32: // %cond.load53
; VBITS_GE_512-NEXT: ld1 { v0.b }[14], [x0], #1
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB14_18
; VBITS_GE_512-NEXT: b .LBB14_19
;
; CHECK-EXPAND-LABEL: masked_load_sext_v16i8i32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.b, vl16
; CHECK-EXPAND-NEXT: ldr q0, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b
; CHECK-EXPAND-NEXT: whilelo p0.b, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: expand z0.b, p1, z0.b
; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-EXPAND-NEXT: sunpklo z0.h, z0.b
; CHECK-EXPAND-NEXT: sunpklo z1.h, z1.b
; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; CHECK-EXPAND-NEXT: ret
%b = load <16 x i8>, ptr %bp
%mask = icmp eq <16 x i8> %b, zeroinitializer
%load = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %ap, <16 x i1> %mask, <16 x i8> poison)
%ext = sext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, ptr %c
ret void
}
define void @masked_load_sext_v8i8i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v8i8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr d0, [x1]
; VBITS_GE_256-NEXT: adrp x8, .LCPI15_0
; VBITS_GE_256-NEXT: ldr d1, [x8, :lo12:.LCPI15_0]
; VBITS_GE_256-NEXT: cmeq v0.8b, v0.8b, #0
; VBITS_GE_256-NEXT: and v0.8b, v0.8b, v1.8b
; VBITS_GE_256-NEXT: addv b0, v0.8b
; VBITS_GE_256-NEXT: fmov w8, s0
; VBITS_GE_256-NEXT: tbz w8, #0, .LBB15_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: fmov s0, w9
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB15_3
; VBITS_GE_256-NEXT: b .LBB15_4
; VBITS_GE_256-NEXT: .LBB15_2:
; VBITS_GE_256-NEXT: // implicit-def: $d0
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB15_4
; VBITS_GE_256-NEXT: .LBB15_3: // %cond.load1
; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_256-NEXT: .LBB15_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB15_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB15_13
; VBITS_GE_256-NEXT: .LBB15_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB15_14
; VBITS_GE_256-NEXT: .LBB15_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB15_15
; VBITS_GE_256-NEXT: .LBB15_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB15_16
; VBITS_GE_256-NEXT: .LBB15_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB15_11
; VBITS_GE_256-NEXT: .LBB15_10: // %cond.load25
; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0]
; VBITS_GE_256-NEXT: .LBB15_11: // %else26
; VBITS_GE_256-NEXT: sshll v0.8h, v0.8b, #0
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB15_12: // %cond.load5
; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB15_6
; VBITS_GE_256-NEXT: .LBB15_13: // %cond.load9
; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB15_7
; VBITS_GE_256-NEXT: .LBB15_14: // %cond.load13
; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB15_8
; VBITS_GE_256-NEXT: .LBB15_15: // %cond.load17
; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB15_9
; VBITS_GE_256-NEXT: .LBB15_16: // %cond.load21
; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB15_10
; VBITS_GE_256-NEXT: b .LBB15_11
;
; VBITS_GE_512-LABEL: masked_load_sext_v8i8i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ldr d0, [x1]
; VBITS_GE_512-NEXT: adrp x8, .LCPI15_0
; VBITS_GE_512-NEXT: ldr d1, [x8, :lo12:.LCPI15_0]
; VBITS_GE_512-NEXT: cmeq v0.8b, v0.8b, #0
; VBITS_GE_512-NEXT: and v0.8b, v0.8b, v1.8b
; VBITS_GE_512-NEXT: addv b0, v0.8b
; VBITS_GE_512-NEXT: fmov w8, s0
; VBITS_GE_512-NEXT: tbz w8, #0, .LBB15_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: fmov s0, w9
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB15_3
; VBITS_GE_512-NEXT: b .LBB15_4
; VBITS_GE_512-NEXT: .LBB15_2:
; VBITS_GE_512-NEXT: // implicit-def: $d0
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB15_4
; VBITS_GE_512-NEXT: .LBB15_3: // %cond.load1
; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_512-NEXT: .LBB15_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB15_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB15_13
; VBITS_GE_512-NEXT: .LBB15_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB15_14
; VBITS_GE_512-NEXT: .LBB15_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB15_15
; VBITS_GE_512-NEXT: .LBB15_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB15_16
; VBITS_GE_512-NEXT: .LBB15_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB15_11
; VBITS_GE_512-NEXT: .LBB15_10: // %cond.load25
; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0]
; VBITS_GE_512-NEXT: .LBB15_11: // %else26
; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB15_12: // %cond.load5
; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB15_6
; VBITS_GE_512-NEXT: .LBB15_13: // %cond.load9
; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB15_7
; VBITS_GE_512-NEXT: .LBB15_14: // %cond.load13
; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB15_8
; VBITS_GE_512-NEXT: .LBB15_15: // %cond.load17
; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB15_9
; VBITS_GE_512-NEXT: .LBB15_16: // %cond.load21
; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB15_10
; VBITS_GE_512-NEXT: b .LBB15_11
;
; CHECK-EXPAND-LABEL: masked_load_sext_v8i8i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.b, vl8
; CHECK-EXPAND-NEXT: ldr d0, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b
; CHECK-EXPAND-NEXT: whilelo p0.b, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: expand z0.b, p1, z0.b
; CHECK-EXPAND-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i8>, ptr %bp
%mask = icmp eq <8 x i8> %b, zeroinitializer
%load = call <8 x i8> @llvm.masked.expandload.v8i8(ptr %ap, <8 x i1> %mask, <8 x i8> poison)
%ext = sext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_sext_v16i16i32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v16i16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p1.h, vl16
; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p0.h, p1/z, z0.h, #0
; VBITS_GE_256-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p0.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: umov w11, v0.b[7]
; VBITS_GE_256-NEXT: umov w12, v0.b[8]
; VBITS_GE_256-NEXT: umov w13, v0.b[3]
; VBITS_GE_256-NEXT: umov w14, v0.b[4]
; VBITS_GE_256-NEXT: umov w15, v0.b[10]
; VBITS_GE_256-NEXT: umov w16, v0.b[5]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[9]
; VBITS_GE_256-NEXT: ubfiz w11, w11, #7, #1
; VBITS_GE_256-NEXT: ubfiz w12, w12, #8, #1
; VBITS_GE_256-NEXT: ubfiz w15, w15, #10, #1
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[11]
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: umov w12, v0.b[13]
; VBITS_GE_256-NEXT: bfi w8, w13, #3, #1
; VBITS_GE_256-NEXT: umov w13, v0.b[12]
; VBITS_GE_256-NEXT: ubfiz w9, w9, #9, #1
; VBITS_GE_256-NEXT: bfi w8, w14, #4, #1
; VBITS_GE_256-NEXT: umov w14, v0.b[14]
; VBITS_GE_256-NEXT: orr w9, w11, w9
; VBITS_GE_256-NEXT: umov w11, v0.b[6]
; VBITS_GE_256-NEXT: ubfiz w10, w10, #11, #1
; VBITS_GE_256-NEXT: orr w9, w9, w15
; VBITS_GE_256-NEXT: ubfiz w13, w13, #12, #1
; VBITS_GE_256-NEXT: bfi w8, w16, #5, #1
; VBITS_GE_256-NEXT: orr w9, w9, w10
; VBITS_GE_256-NEXT: ubfiz w10, w12, #13, #1
; VBITS_GE_256-NEXT: orr w9, w9, w13
; VBITS_GE_256-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_256-NEXT: umov w13, v0.b[15]
; VBITS_GE_256-NEXT: bfi w8, w11, #6, #1
; VBITS_GE_256-NEXT: orr w9, w9, w10
; VBITS_GE_256-NEXT: orr w9, w9, w12
; VBITS_GE_256-NEXT: orr w8, w8, w9
; VBITS_GE_256-NEXT: orr w9, w8, w13, lsl #15
; VBITS_GE_256-NEXT: and w8, w9, #0xffff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB16_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rh { z0.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #2
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB16_3
; VBITS_GE_256-NEXT: b .LBB16_4
; VBITS_GE_256-NEXT: .LBB16_2:
; VBITS_GE_256-NEXT: adrp x9, .LCPI16_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI16_0
; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB16_4
; VBITS_GE_256-NEXT: .LBB16_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: .LBB16_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB16_20
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB16_21
; VBITS_GE_256-NEXT: .LBB16_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB16_22
; VBITS_GE_256-NEXT: .LBB16_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB16_23
; VBITS_GE_256-NEXT: .LBB16_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB16_24
; VBITS_GE_256-NEXT: .LBB16_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB16_25
; VBITS_GE_256-NEXT: .LBB16_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB16_26
; VBITS_GE_256-NEXT: .LBB16_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB16_27
; VBITS_GE_256-NEXT: .LBB16_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB16_28
; VBITS_GE_256-NEXT: .LBB16_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB16_29
; VBITS_GE_256-NEXT: .LBB16_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB16_30
; VBITS_GE_256-NEXT: .LBB16_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB16_31
; VBITS_GE_256-NEXT: .LBB16_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB16_32
; VBITS_GE_256-NEXT: .LBB16_17: // %else54
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB16_19
; VBITS_GE_256-NEXT: .LBB16_18: // %cond.load57
; VBITS_GE_256-NEXT: mov w8, #15 // =0xf
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w8
; VBITS_GE_256-NEXT: ldrh w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w8
; VBITS_GE_256-NEXT: .LBB16_19: // %else58
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB16_20: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB16_6
; VBITS_GE_256-NEXT: .LBB16_21: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB16_7
; VBITS_GE_256-NEXT: .LBB16_22: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB16_8
; VBITS_GE_256-NEXT: .LBB16_23: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB16_9
; VBITS_GE_256-NEXT: .LBB16_24: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB16_10
; VBITS_GE_256-NEXT: .LBB16_25: // %cond.load25
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB16_11
; VBITS_GE_256-NEXT: .LBB16_26: // %cond.load29
; VBITS_GE_256-NEXT: mov w9, #8 // =0x8
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB16_12
; VBITS_GE_256-NEXT: .LBB16_27: // %cond.load33
; VBITS_GE_256-NEXT: mov w9, #9 // =0x9
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB16_13
; VBITS_GE_256-NEXT: .LBB16_28: // %cond.load37
; VBITS_GE_256-NEXT: mov w9, #10 // =0xa
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB16_14
; VBITS_GE_256-NEXT: .LBB16_29: // %cond.load41
; VBITS_GE_256-NEXT: mov w9, #11 // =0xb
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB16_15
; VBITS_GE_256-NEXT: .LBB16_30: // %cond.load45
; VBITS_GE_256-NEXT: mov w9, #12 // =0xc
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB16_16
; VBITS_GE_256-NEXT: .LBB16_31: // %cond.load49
; VBITS_GE_256-NEXT: mov w9, #13 // =0xd
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB16_17
; VBITS_GE_256-NEXT: .LBB16_32: // %cond.load53
; VBITS_GE_256-NEXT: mov w9, #14 // =0xe
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB16_18
; VBITS_GE_256-NEXT: b .LBB16_19
;
; VBITS_GE_512-LABEL: masked_load_sext_v16i16i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p1.h, vl16
; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p0.h, p1/z, z0.h, #0
; VBITS_GE_512-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: umov w11, v0.b[7]
; VBITS_GE_512-NEXT: umov w12, v0.b[8]
; VBITS_GE_512-NEXT: umov w13, v0.b[3]
; VBITS_GE_512-NEXT: umov w14, v0.b[4]
; VBITS_GE_512-NEXT: umov w15, v0.b[10]
; VBITS_GE_512-NEXT: umov w16, v0.b[5]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[9]
; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1
; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1
; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[11]
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: umov w12, v0.b[13]
; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[12]
; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1
; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: orr w9, w11, w9
; VBITS_GE_512-NEXT: umov w11, v0.b[6]
; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1
; VBITS_GE_512-NEXT: orr w9, w9, w15
; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1
; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[15]
; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: orr w9, w9, w12
; VBITS_GE_512-NEXT: orr w8, w8, w9
; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15
; VBITS_GE_512-NEXT: and w8, w9, #0xffff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB16_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rh { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #2
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB16_3
; VBITS_GE_512-NEXT: b .LBB16_4
; VBITS_GE_512-NEXT: .LBB16_2:
; VBITS_GE_512-NEXT: adrp x9, .LCPI16_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI16_0
; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB16_4
; VBITS_GE_512-NEXT: .LBB16_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: .LBB16_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB16_20
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB16_21
; VBITS_GE_512-NEXT: .LBB16_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB16_22
; VBITS_GE_512-NEXT: .LBB16_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB16_23
; VBITS_GE_512-NEXT: .LBB16_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB16_24
; VBITS_GE_512-NEXT: .LBB16_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB16_25
; VBITS_GE_512-NEXT: .LBB16_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB16_26
; VBITS_GE_512-NEXT: .LBB16_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB16_27
; VBITS_GE_512-NEXT: .LBB16_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB16_28
; VBITS_GE_512-NEXT: .LBB16_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB16_29
; VBITS_GE_512-NEXT: .LBB16_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB16_30
; VBITS_GE_512-NEXT: .LBB16_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB16_31
; VBITS_GE_512-NEXT: .LBB16_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB16_32
; VBITS_GE_512-NEXT: .LBB16_17: // %else54
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB16_19
; VBITS_GE_512-NEXT: .LBB16_18: // %cond.load57
; VBITS_GE_512-NEXT: mov w8, #15 // =0xf
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w8
; VBITS_GE_512-NEXT: ldrh w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w8
; VBITS_GE_512-NEXT: .LBB16_19: // %else58
; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB16_20: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB16_6
; VBITS_GE_512-NEXT: .LBB16_21: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB16_7
; VBITS_GE_512-NEXT: .LBB16_22: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB16_8
; VBITS_GE_512-NEXT: .LBB16_23: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB16_9
; VBITS_GE_512-NEXT: .LBB16_24: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB16_10
; VBITS_GE_512-NEXT: .LBB16_25: // %cond.load25
; VBITS_GE_512-NEXT: mov w9, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB16_11
; VBITS_GE_512-NEXT: .LBB16_26: // %cond.load29
; VBITS_GE_512-NEXT: mov w9, #8 // =0x8
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB16_12
; VBITS_GE_512-NEXT: .LBB16_27: // %cond.load33
; VBITS_GE_512-NEXT: mov w9, #9 // =0x9
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB16_13
; VBITS_GE_512-NEXT: .LBB16_28: // %cond.load37
; VBITS_GE_512-NEXT: mov w9, #10 // =0xa
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB16_14
; VBITS_GE_512-NEXT: .LBB16_29: // %cond.load41
; VBITS_GE_512-NEXT: mov w9, #11 // =0xb
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB16_15
; VBITS_GE_512-NEXT: .LBB16_30: // %cond.load45
; VBITS_GE_512-NEXT: mov w9, #12 // =0xc
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB16_16
; VBITS_GE_512-NEXT: .LBB16_31: // %cond.load49
; VBITS_GE_512-NEXT: mov w9, #13 // =0xd
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB16_17
; VBITS_GE_512-NEXT: .LBB16_32: // %cond.load53
; VBITS_GE_512-NEXT: mov w9, #14 // =0xe
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB16_18
; VBITS_GE_512-NEXT: b .LBB16_19
;
; CHECK-EXPAND-LABEL: masked_load_sext_v16i16i32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.h, vl16
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h
; CHECK-EXPAND-NEXT: whilelo p0.h, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; CHECK-EXPAND-NEXT: ret
%b = load <16 x i16>, ptr %bp
%mask = icmp eq <16 x i16> %b, zeroinitializer
%load = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %ap, <16 x i1> %mask, <16 x i16> poison)
%ext = sext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, ptr %c
ret void
}
define void @masked_load_sext_v8i16i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v8i16i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x1]
; VBITS_GE_256-NEXT: adrp x8, .LCPI17_0
; VBITS_GE_256-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
; VBITS_GE_256-NEXT: cmeq v0.8h, v0.8h, #0
; VBITS_GE_256-NEXT: and v0.16b, v0.16b, v1.16b
; VBITS_GE_256-NEXT: addv h0, v0.8h
; VBITS_GE_256-NEXT: fmov w8, s0
; VBITS_GE_256-NEXT: tbz w8, #0, .LBB17_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: fmov s0, w9
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB17_3
; VBITS_GE_256-NEXT: b .LBB17_4
; VBITS_GE_256-NEXT: .LBB17_2:
; VBITS_GE_256-NEXT: // implicit-def: $q0
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB17_4
; VBITS_GE_256-NEXT: .LBB17_3: // %cond.load1
; VBITS_GE_256-NEXT: ld1 { v0.h }[1], [x0], #2
; VBITS_GE_256-NEXT: .LBB17_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB17_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB17_13
; VBITS_GE_256-NEXT: .LBB17_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB17_14
; VBITS_GE_256-NEXT: .LBB17_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB17_15
; VBITS_GE_256-NEXT: .LBB17_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB17_16
; VBITS_GE_256-NEXT: .LBB17_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB17_11
; VBITS_GE_256-NEXT: .LBB17_10: // %cond.load25
; VBITS_GE_256-NEXT: ld1 { v0.h }[7], [x0]
; VBITS_GE_256-NEXT: .LBB17_11: // %else26
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB17_12: // %cond.load5
; VBITS_GE_256-NEXT: ld1 { v0.h }[2], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB17_6
; VBITS_GE_256-NEXT: .LBB17_13: // %cond.load9
; VBITS_GE_256-NEXT: ld1 { v0.h }[3], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB17_7
; VBITS_GE_256-NEXT: .LBB17_14: // %cond.load13
; VBITS_GE_256-NEXT: ld1 { v0.h }[4], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB17_8
; VBITS_GE_256-NEXT: .LBB17_15: // %cond.load17
; VBITS_GE_256-NEXT: ld1 { v0.h }[5], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB17_9
; VBITS_GE_256-NEXT: .LBB17_16: // %cond.load21
; VBITS_GE_256-NEXT: ld1 { v0.h }[6], [x0], #2
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB17_10
; VBITS_GE_256-NEXT: b .LBB17_11
;
; VBITS_GE_512-LABEL: masked_load_sext_v8i16i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ldr q0, [x1]
; VBITS_GE_512-NEXT: adrp x8, .LCPI17_0
; VBITS_GE_512-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
; VBITS_GE_512-NEXT: cmeq v0.8h, v0.8h, #0
; VBITS_GE_512-NEXT: and v0.16b, v0.16b, v1.16b
; VBITS_GE_512-NEXT: addv h0, v0.8h
; VBITS_GE_512-NEXT: fmov w8, s0
; VBITS_GE_512-NEXT: tbz w8, #0, .LBB17_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: fmov s0, w9
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB17_3
; VBITS_GE_512-NEXT: b .LBB17_4
; VBITS_GE_512-NEXT: .LBB17_2:
; VBITS_GE_512-NEXT: // implicit-def: $q0
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB17_4
; VBITS_GE_512-NEXT: .LBB17_3: // %cond.load1
; VBITS_GE_512-NEXT: ld1 { v0.h }[1], [x0], #2
; VBITS_GE_512-NEXT: .LBB17_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB17_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB17_13
; VBITS_GE_512-NEXT: .LBB17_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB17_14
; VBITS_GE_512-NEXT: .LBB17_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB17_15
; VBITS_GE_512-NEXT: .LBB17_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB17_16
; VBITS_GE_512-NEXT: .LBB17_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB17_11
; VBITS_GE_512-NEXT: .LBB17_10: // %cond.load25
; VBITS_GE_512-NEXT: ld1 { v0.h }[7], [x0]
; VBITS_GE_512-NEXT: .LBB17_11: // %else26
; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB17_12: // %cond.load5
; VBITS_GE_512-NEXT: ld1 { v0.h }[2], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB17_6
; VBITS_GE_512-NEXT: .LBB17_13: // %cond.load9
; VBITS_GE_512-NEXT: ld1 { v0.h }[3], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB17_7
; VBITS_GE_512-NEXT: .LBB17_14: // %cond.load13
; VBITS_GE_512-NEXT: ld1 { v0.h }[4], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB17_8
; VBITS_GE_512-NEXT: .LBB17_15: // %cond.load17
; VBITS_GE_512-NEXT: ld1 { v0.h }[5], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB17_9
; VBITS_GE_512-NEXT: .LBB17_16: // %cond.load21
; VBITS_GE_512-NEXT: ld1 { v0.h }[6], [x0], #2
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB17_10
; VBITS_GE_512-NEXT: b .LBB17_11
;
; CHECK-EXPAND-LABEL: masked_load_sext_v8i16i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.h, vl8
; CHECK-EXPAND-NEXT: ldr q0, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h
; CHECK-EXPAND-NEXT: whilelo p0.h, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h
; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i16>, ptr %bp
%mask = icmp eq <8 x i16> %b, zeroinitializer
%load = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %ap, <8 x i1> %mask, <8 x i16> poison)
%ext = sext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_sext_v8i32i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v8i32i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p0.s, p1/z, z0.s, #0
; VBITS_GE_256-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p0.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB18_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rw { z0.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #4
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB18_3
; VBITS_GE_256-NEXT: b .LBB18_4
; VBITS_GE_256-NEXT: .LBB18_2:
; VBITS_GE_256-NEXT: adrp x9, .LCPI18_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI18_0
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB18_4
; VBITS_GE_256-NEXT: .LBB18_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: .LBB18_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB18_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB18_13
; VBITS_GE_256-NEXT: .LBB18_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB18_14
; VBITS_GE_256-NEXT: .LBB18_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB18_15
; VBITS_GE_256-NEXT: .LBB18_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB18_16
; VBITS_GE_256-NEXT: .LBB18_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB18_11
; VBITS_GE_256-NEXT: .LBB18_10: // %cond.load25
; VBITS_GE_256-NEXT: mov w8, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w8
; VBITS_GE_256-NEXT: ldr w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w8
; VBITS_GE_256-NEXT: .LBB18_11: // %else26
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB18_12: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB18_6
; VBITS_GE_256-NEXT: .LBB18_13: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB18_7
; VBITS_GE_256-NEXT: .LBB18_14: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB18_8
; VBITS_GE_256-NEXT: .LBB18_15: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB18_9
; VBITS_GE_256-NEXT: .LBB18_16: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB18_10
; VBITS_GE_256-NEXT: b .LBB18_11
;
; VBITS_GE_512-LABEL: masked_load_sext_v8i32i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p1.s, vl8
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p0.s, p1/z, z0.s, #0
; VBITS_GE_512-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB18_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rw { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #4
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB18_3
; VBITS_GE_512-NEXT: b .LBB18_4
; VBITS_GE_512-NEXT: .LBB18_2:
; VBITS_GE_512-NEXT: adrp x9, .LCPI18_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI18_0
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB18_4
; VBITS_GE_512-NEXT: .LBB18_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: .LBB18_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB18_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB18_13
; VBITS_GE_512-NEXT: .LBB18_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB18_14
; VBITS_GE_512-NEXT: .LBB18_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB18_15
; VBITS_GE_512-NEXT: .LBB18_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB18_16
; VBITS_GE_512-NEXT: .LBB18_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB18_11
; VBITS_GE_512-NEXT: .LBB18_10: // %cond.load25
; VBITS_GE_512-NEXT: mov w8, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w8
; VBITS_GE_512-NEXT: ldr w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w8
; VBITS_GE_512-NEXT: .LBB18_11: // %else26
; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB18_12: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB18_6
; VBITS_GE_512-NEXT: .LBB18_13: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB18_7
; VBITS_GE_512-NEXT: .LBB18_14: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB18_8
; VBITS_GE_512-NEXT: .LBB18_15: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB18_9
; VBITS_GE_512-NEXT: .LBB18_16: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB18_10
; VBITS_GE_512-NEXT: b .LBB18_11
;
; CHECK-EXPAND-LABEL: masked_load_sext_v8i32i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s
; CHECK-EXPAND-NEXT: whilelo p0.s, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i32>, ptr %bp
%mask = icmp eq <8 x i32> %b, zeroinitializer
%load = call <8 x i32> @llvm.masked.expandload.v8i32(ptr %ap, <8 x i1> %mask, <8 x i32> poison)
%ext = sext <8 x i32> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_zext_v32i8i16(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v32i8i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #112
; VBITS_GE_256-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 112
; VBITS_GE_256-NEXT: .cfi_offset w19, -8
; VBITS_GE_256-NEXT: .cfi_offset w20, -16
; VBITS_GE_256-NEXT: .cfi_offset w21, -24
; VBITS_GE_256-NEXT: .cfi_offset w22, -32
; VBITS_GE_256-NEXT: .cfi_offset w23, -40
; VBITS_GE_256-NEXT: .cfi_offset w24, -48
; VBITS_GE_256-NEXT: .cfi_offset w25, -56
; VBITS_GE_256-NEXT: .cfi_offset w26, -64
; VBITS_GE_256-NEXT: .cfi_offset w27, -72
; VBITS_GE_256-NEXT: .cfi_offset w28, -80
; VBITS_GE_256-NEXT: .cfi_offset w30, -88
; VBITS_GE_256-NEXT: .cfi_offset w29, -96
; VBITS_GE_256-NEXT: ptrue p1.b, vl32
; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p0.b, p1/z, z0.b, #0
; VBITS_GE_256-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p0.b
; VBITS_GE_256-NEXT: umov w13, v0.b[1]
; VBITS_GE_256-NEXT: fmov w6, s0
; VBITS_GE_256-NEXT: umov w4, v0.b[7]
; VBITS_GE_256-NEXT: umov w5, v0.b[8]
; VBITS_GE_256-NEXT: umov w12, v0.b[2]
; VBITS_GE_256-NEXT: umov w3, v0.b[9]
; VBITS_GE_256-NEXT: mov z5.b, z0.b[18]
; VBITS_GE_256-NEXT: mov z6.b, z0.b[19]
; VBITS_GE_256-NEXT: umov w11, v0.b[3]
; VBITS_GE_256-NEXT: and w6, w6, #0x1
; VBITS_GE_256-NEXT: umov w1, v0.b[10]
; VBITS_GE_256-NEXT: mov z7.b, z0.b[20]
; VBITS_GE_256-NEXT: bfi w6, w13, #1, #1
; VBITS_GE_256-NEXT: umov w18, v0.b[11]
; VBITS_GE_256-NEXT: mov z16.b, z0.b[21]
; VBITS_GE_256-NEXT: ubfiz w13, w4, #7, #1
; VBITS_GE_256-NEXT: ubfiz w4, w5, #8, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: mov z17.b, z0.b[22]
; VBITS_GE_256-NEXT: fmov w20, s5
; VBITS_GE_256-NEXT: fmov w21, s6
; VBITS_GE_256-NEXT: bfi w6, w12, #2, #1
; VBITS_GE_256-NEXT: umov w16, v0.b[12]
; VBITS_GE_256-NEXT: mov z18.b, z0.b[23]
; VBITS_GE_256-NEXT: fmov w22, s7
; VBITS_GE_256-NEXT: orr w12, w13, w4
; VBITS_GE_256-NEXT: ubfiz w13, w3, #9, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: umov w17, v0.b[13]
; VBITS_GE_256-NEXT: mov z19.b, z0.b[24]
; VBITS_GE_256-NEXT: fmov w23, s16
; VBITS_GE_256-NEXT: bfi w6, w11, #3, #1
; VBITS_GE_256-NEXT: ubfiz w11, w1, #10, #1
; VBITS_GE_256-NEXT: mov z20.b, z0.b[25]
; VBITS_GE_256-NEXT: fmov w24, s17
; VBITS_GE_256-NEXT: ubfiz w3, w20, #18, #1
; VBITS_GE_256-NEXT: ubfiz w4, w21, #19, #1
; VBITS_GE_256-NEXT: orr w12, w12, w13
; VBITS_GE_256-NEXT: ubfiz w13, w18, #11, #1
; VBITS_GE_256-NEXT: mov z21.b, z0.b[26]
; VBITS_GE_256-NEXT: fmov w25, s18
; VBITS_GE_256-NEXT: ubfiz w1, w22, #20, #1
; VBITS_GE_256-NEXT: orr w11, w12, w11
; VBITS_GE_256-NEXT: bfi w6, w10, #4, #1
; VBITS_GE_256-NEXT: umov w14, v0.b[14]
; VBITS_GE_256-NEXT: fmov w26, s19
; VBITS_GE_256-NEXT: orr w3, w3, w4
; VBITS_GE_256-NEXT: orr w11, w11, w13
; VBITS_GE_256-NEXT: ubfiz w12, w16, #12, #1
; VBITS_GE_256-NEXT: ubfiz w13, w23, #21, #1
; VBITS_GE_256-NEXT: mov z22.b, z0.b[27]
; VBITS_GE_256-NEXT: fmov w27, s20
; VBITS_GE_256-NEXT: orr w10, w3, w1
; VBITS_GE_256-NEXT: bfi w6, w9, #5, #1
; VBITS_GE_256-NEXT: ubfiz w9, w17, #13, #1
; VBITS_GE_256-NEXT: ubfiz w16, w24, #22, #1
; VBITS_GE_256-NEXT: umov w8, v0.b[6]
; VBITS_GE_256-NEXT: umov w15, v0.b[15]
; VBITS_GE_256-NEXT: mov z3.b, z0.b[16]
; VBITS_GE_256-NEXT: mov z23.b, z0.b[28]
; VBITS_GE_256-NEXT: fmov w5, s21
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: orr w10, w10, w13
; VBITS_GE_256-NEXT: ubfiz w12, w25, #23, #1
; VBITS_GE_256-NEXT: mov z4.b, z0.b[17]
; VBITS_GE_256-NEXT: mov z24.b, z0.b[29]
; VBITS_GE_256-NEXT: orr w9, w11, w9
; VBITS_GE_256-NEXT: orr w10, w10, w16
; VBITS_GE_256-NEXT: ubfiz w11, w26, #24, #1
; VBITS_GE_256-NEXT: mov z2.b, z0.b[30]
; VBITS_GE_256-NEXT: fmov w28, s22
; VBITS_GE_256-NEXT: orr w10, w10, w12
; VBITS_GE_256-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_256-NEXT: ubfiz w13, w27, #25, #1
; VBITS_GE_256-NEXT: fmov w7, s3
; VBITS_GE_256-NEXT: fmov w29, s23
; VBITS_GE_256-NEXT: orr w10, w10, w11
; VBITS_GE_256-NEXT: ubfiz w14, w5, #26, #1
; VBITS_GE_256-NEXT: fmov w19, s4
; VBITS_GE_256-NEXT: fmov w30, s24
; VBITS_GE_256-NEXT: ubfiz w11, w15, #15, #1
; VBITS_GE_256-NEXT: bfi w6, w8, #6, #1
; VBITS_GE_256-NEXT: orr w8, w9, w12
; VBITS_GE_256-NEXT: orr w9, w10, w13
; VBITS_GE_256-NEXT: orr w9, w9, w14
; VBITS_GE_256-NEXT: ubfiz w10, w28, #27, #1
; VBITS_GE_256-NEXT: fmov w14, s2
; VBITS_GE_256-NEXT: orr w8, w8, w11
; VBITS_GE_256-NEXT: ubfiz w11, w7, #16, #1
; VBITS_GE_256-NEXT: ubfiz w13, w29, #28, #1
; VBITS_GE_256-NEXT: ubfiz w12, w19, #17, #1
; VBITS_GE_256-NEXT: orr w9, w9, w10
; VBITS_GE_256-NEXT: ubfiz w10, w30, #29, #1
; VBITS_GE_256-NEXT: mov z1.b, z0.b[31]
; VBITS_GE_256-NEXT: orr w8, w8, w11
; VBITS_GE_256-NEXT: orr w9, w9, w13
; VBITS_GE_256-NEXT: ubfiz w11, w14, #30, #1
; VBITS_GE_256-NEXT: orr w8, w8, w12
; VBITS_GE_256-NEXT: orr w9, w9, w10
; VBITS_GE_256-NEXT: orr w8, w6, w8
; VBITS_GE_256-NEXT: orr w9, w9, w11
; VBITS_GE_256-NEXT: orr w8, w8, w9
; VBITS_GE_256-NEXT: fmov w9, s1
; VBITS_GE_256-NEXT: orr w8, w8, w9, lsl #31
; VBITS_GE_256-NEXT: tbz w8, #0, .LBB19_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rb { z0.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #1
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB19_3
; VBITS_GE_256-NEXT: b .LBB19_4
; VBITS_GE_256-NEXT: .LBB19_2:
; VBITS_GE_256-NEXT: adrp x9, .LCPI19_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI19_0
; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB19_4
; VBITS_GE_256-NEXT: .LBB19_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: .LBB19_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB19_36
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB19_37
; VBITS_GE_256-NEXT: .LBB19_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB19_38
; VBITS_GE_256-NEXT: .LBB19_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB19_39
; VBITS_GE_256-NEXT: .LBB19_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB19_40
; VBITS_GE_256-NEXT: .LBB19_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB19_41
; VBITS_GE_256-NEXT: .LBB19_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB19_42
; VBITS_GE_256-NEXT: .LBB19_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB19_43
; VBITS_GE_256-NEXT: .LBB19_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB19_44
; VBITS_GE_256-NEXT: .LBB19_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB19_45
; VBITS_GE_256-NEXT: .LBB19_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB19_46
; VBITS_GE_256-NEXT: .LBB19_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB19_47
; VBITS_GE_256-NEXT: .LBB19_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB19_48
; VBITS_GE_256-NEXT: .LBB19_17: // %else54
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB19_49
; VBITS_GE_256-NEXT: .LBB19_18: // %else58
; VBITS_GE_256-NEXT: tbnz w8, #16, .LBB19_50
; VBITS_GE_256-NEXT: .LBB19_19: // %else62
; VBITS_GE_256-NEXT: tbnz w8, #17, .LBB19_51
; VBITS_GE_256-NEXT: .LBB19_20: // %else66
; VBITS_GE_256-NEXT: tbnz w8, #18, .LBB19_52
; VBITS_GE_256-NEXT: .LBB19_21: // %else70
; VBITS_GE_256-NEXT: tbnz w8, #19, .LBB19_53
; VBITS_GE_256-NEXT: .LBB19_22: // %else74
; VBITS_GE_256-NEXT: tbnz w8, #20, .LBB19_54
; VBITS_GE_256-NEXT: .LBB19_23: // %else78
; VBITS_GE_256-NEXT: tbnz w8, #21, .LBB19_55
; VBITS_GE_256-NEXT: .LBB19_24: // %else82
; VBITS_GE_256-NEXT: tbnz w8, #22, .LBB19_56
; VBITS_GE_256-NEXT: .LBB19_25: // %else86
; VBITS_GE_256-NEXT: tbnz w8, #23, .LBB19_57
; VBITS_GE_256-NEXT: .LBB19_26: // %else90
; VBITS_GE_256-NEXT: tbnz w8, #24, .LBB19_58
; VBITS_GE_256-NEXT: .LBB19_27: // %else94
; VBITS_GE_256-NEXT: tbnz w8, #25, .LBB19_59
; VBITS_GE_256-NEXT: .LBB19_28: // %else98
; VBITS_GE_256-NEXT: tbnz w8, #26, .LBB19_60
; VBITS_GE_256-NEXT: .LBB19_29: // %else102
; VBITS_GE_256-NEXT: tbnz w8, #27, .LBB19_61
; VBITS_GE_256-NEXT: .LBB19_30: // %else106
; VBITS_GE_256-NEXT: tbnz w8, #28, .LBB19_62
; VBITS_GE_256-NEXT: .LBB19_31: // %else110
; VBITS_GE_256-NEXT: tbnz w8, #29, .LBB19_63
; VBITS_GE_256-NEXT: .LBB19_32: // %else114
; VBITS_GE_256-NEXT: tbnz w8, #30, .LBB19_64
; VBITS_GE_256-NEXT: .LBB19_33: // %else118
; VBITS_GE_256-NEXT: tbz w8, #31, .LBB19_35
; VBITS_GE_256-NEXT: .LBB19_34: // %cond.load121
; VBITS_GE_256-NEXT: mov w8, #31 // =0x1f
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w8
; VBITS_GE_256-NEXT: ldrb w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w8
; VBITS_GE_256-NEXT: .LBB19_35: // %else122
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b
; VBITS_GE_256-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2]
; VBITS_GE_256-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1]
; VBITS_GE_256-NEXT: add sp, sp, #112
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB19_36: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB19_6
; VBITS_GE_256-NEXT: .LBB19_37: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB19_7
; VBITS_GE_256-NEXT: .LBB19_38: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB19_8
; VBITS_GE_256-NEXT: .LBB19_39: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB19_9
; VBITS_GE_256-NEXT: .LBB19_40: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB19_10
; VBITS_GE_256-NEXT: .LBB19_41: // %cond.load25
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB19_11
; VBITS_GE_256-NEXT: .LBB19_42: // %cond.load29
; VBITS_GE_256-NEXT: mov w9, #8 // =0x8
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB19_12
; VBITS_GE_256-NEXT: .LBB19_43: // %cond.load33
; VBITS_GE_256-NEXT: mov w9, #9 // =0x9
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB19_13
; VBITS_GE_256-NEXT: .LBB19_44: // %cond.load37
; VBITS_GE_256-NEXT: mov w9, #10 // =0xa
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB19_14
; VBITS_GE_256-NEXT: .LBB19_45: // %cond.load41
; VBITS_GE_256-NEXT: mov w9, #11 // =0xb
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB19_15
; VBITS_GE_256-NEXT: .LBB19_46: // %cond.load45
; VBITS_GE_256-NEXT: mov w9, #12 // =0xc
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB19_16
; VBITS_GE_256-NEXT: .LBB19_47: // %cond.load49
; VBITS_GE_256-NEXT: mov w9, #13 // =0xd
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB19_17
; VBITS_GE_256-NEXT: .LBB19_48: // %cond.load53
; VBITS_GE_256-NEXT: mov w9, #14 // =0xe
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB19_18
; VBITS_GE_256-NEXT: .LBB19_49: // %cond.load57
; VBITS_GE_256-NEXT: mov w9, #15 // =0xf
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #16, .LBB19_19
; VBITS_GE_256-NEXT: .LBB19_50: // %cond.load61
; VBITS_GE_256-NEXT: mov w9, #16 // =0x10
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #17, .LBB19_20
; VBITS_GE_256-NEXT: .LBB19_51: // %cond.load65
; VBITS_GE_256-NEXT: mov w9, #17 // =0x11
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #18, .LBB19_21
; VBITS_GE_256-NEXT: .LBB19_52: // %cond.load69
; VBITS_GE_256-NEXT: mov w9, #18 // =0x12
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #19, .LBB19_22
; VBITS_GE_256-NEXT: .LBB19_53: // %cond.load73
; VBITS_GE_256-NEXT: mov w9, #19 // =0x13
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #20, .LBB19_23
; VBITS_GE_256-NEXT: .LBB19_54: // %cond.load77
; VBITS_GE_256-NEXT: mov w9, #20 // =0x14
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #21, .LBB19_24
; VBITS_GE_256-NEXT: .LBB19_55: // %cond.load81
; VBITS_GE_256-NEXT: mov w9, #21 // =0x15
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #22, .LBB19_25
; VBITS_GE_256-NEXT: .LBB19_56: // %cond.load85
; VBITS_GE_256-NEXT: mov w9, #22 // =0x16
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #23, .LBB19_26
; VBITS_GE_256-NEXT: .LBB19_57: // %cond.load89
; VBITS_GE_256-NEXT: mov w9, #23 // =0x17
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #24, .LBB19_27
; VBITS_GE_256-NEXT: .LBB19_58: // %cond.load93
; VBITS_GE_256-NEXT: mov w9, #24 // =0x18
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #25, .LBB19_28
; VBITS_GE_256-NEXT: .LBB19_59: // %cond.load97
; VBITS_GE_256-NEXT: mov w9, #25 // =0x19
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #26, .LBB19_29
; VBITS_GE_256-NEXT: .LBB19_60: // %cond.load101
; VBITS_GE_256-NEXT: mov w9, #26 // =0x1a
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #27, .LBB19_30
; VBITS_GE_256-NEXT: .LBB19_61: // %cond.load105
; VBITS_GE_256-NEXT: mov w9, #27 // =0x1b
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #28, .LBB19_31
; VBITS_GE_256-NEXT: .LBB19_62: // %cond.load109
; VBITS_GE_256-NEXT: mov w9, #28 // =0x1c
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #29, .LBB19_32
; VBITS_GE_256-NEXT: .LBB19_63: // %cond.load113
; VBITS_GE_256-NEXT: mov w9, #29 // =0x1d
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #30, .LBB19_33
; VBITS_GE_256-NEXT: .LBB19_64: // %cond.load117
; VBITS_GE_256-NEXT: mov w9, #30 // =0x1e
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #31, .LBB19_34
; VBITS_GE_256-NEXT: b .LBB19_35
;
; VBITS_GE_512-LABEL: masked_load_zext_v32i8i16:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #112
; VBITS_GE_512-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 112
; VBITS_GE_512-NEXT: .cfi_offset w19, -8
; VBITS_GE_512-NEXT: .cfi_offset w20, -16
; VBITS_GE_512-NEXT: .cfi_offset w21, -24
; VBITS_GE_512-NEXT: .cfi_offset w22, -32
; VBITS_GE_512-NEXT: .cfi_offset w23, -40
; VBITS_GE_512-NEXT: .cfi_offset w24, -48
; VBITS_GE_512-NEXT: .cfi_offset w25, -56
; VBITS_GE_512-NEXT: .cfi_offset w26, -64
; VBITS_GE_512-NEXT: .cfi_offset w27, -72
; VBITS_GE_512-NEXT: .cfi_offset w28, -80
; VBITS_GE_512-NEXT: .cfi_offset w30, -88
; VBITS_GE_512-NEXT: .cfi_offset w29, -96
; VBITS_GE_512-NEXT: ptrue p1.b, vl32
; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p0.b, p1/z, z0.b, #0
; VBITS_GE_512-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p0.b
; VBITS_GE_512-NEXT: umov w13, v0.b[1]
; VBITS_GE_512-NEXT: fmov w6, s0
; VBITS_GE_512-NEXT: umov w4, v0.b[7]
; VBITS_GE_512-NEXT: umov w5, v0.b[8]
; VBITS_GE_512-NEXT: umov w12, v0.b[2]
; VBITS_GE_512-NEXT: umov w3, v0.b[9]
; VBITS_GE_512-NEXT: mov z5.b, z0.b[18]
; VBITS_GE_512-NEXT: mov z6.b, z0.b[19]
; VBITS_GE_512-NEXT: umov w11, v0.b[3]
; VBITS_GE_512-NEXT: and w6, w6, #0x1
; VBITS_GE_512-NEXT: umov w1, v0.b[10]
; VBITS_GE_512-NEXT: mov z7.b, z0.b[20]
; VBITS_GE_512-NEXT: bfi w6, w13, #1, #1
; VBITS_GE_512-NEXT: umov w18, v0.b[11]
; VBITS_GE_512-NEXT: mov z16.b, z0.b[21]
; VBITS_GE_512-NEXT: ubfiz w13, w4, #7, #1
; VBITS_GE_512-NEXT: ubfiz w4, w5, #8, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: mov z17.b, z0.b[22]
; VBITS_GE_512-NEXT: fmov w20, s5
; VBITS_GE_512-NEXT: fmov w21, s6
; VBITS_GE_512-NEXT: bfi w6, w12, #2, #1
; VBITS_GE_512-NEXT: umov w16, v0.b[12]
; VBITS_GE_512-NEXT: mov z18.b, z0.b[23]
; VBITS_GE_512-NEXT: fmov w22, s7
; VBITS_GE_512-NEXT: orr w12, w13, w4
; VBITS_GE_512-NEXT: ubfiz w13, w3, #9, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: umov w17, v0.b[13]
; VBITS_GE_512-NEXT: mov z19.b, z0.b[24]
; VBITS_GE_512-NEXT: fmov w23, s16
; VBITS_GE_512-NEXT: bfi w6, w11, #3, #1
; VBITS_GE_512-NEXT: ubfiz w11, w1, #10, #1
; VBITS_GE_512-NEXT: mov z20.b, z0.b[25]
; VBITS_GE_512-NEXT: fmov w24, s17
; VBITS_GE_512-NEXT: ubfiz w3, w20, #18, #1
; VBITS_GE_512-NEXT: ubfiz w4, w21, #19, #1
; VBITS_GE_512-NEXT: orr w12, w12, w13
; VBITS_GE_512-NEXT: ubfiz w13, w18, #11, #1
; VBITS_GE_512-NEXT: mov z21.b, z0.b[26]
; VBITS_GE_512-NEXT: fmov w25, s18
; VBITS_GE_512-NEXT: ubfiz w1, w22, #20, #1
; VBITS_GE_512-NEXT: orr w11, w12, w11
; VBITS_GE_512-NEXT: bfi w6, w10, #4, #1
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: fmov w26, s19
; VBITS_GE_512-NEXT: orr w3, w3, w4
; VBITS_GE_512-NEXT: orr w11, w11, w13
; VBITS_GE_512-NEXT: ubfiz w12, w16, #12, #1
; VBITS_GE_512-NEXT: ubfiz w13, w23, #21, #1
; VBITS_GE_512-NEXT: mov z22.b, z0.b[27]
; VBITS_GE_512-NEXT: fmov w27, s20
; VBITS_GE_512-NEXT: orr w10, w3, w1
; VBITS_GE_512-NEXT: bfi w6, w9, #5, #1
; VBITS_GE_512-NEXT: ubfiz w9, w17, #13, #1
; VBITS_GE_512-NEXT: ubfiz w16, w24, #22, #1
; VBITS_GE_512-NEXT: umov w8, v0.b[6]
; VBITS_GE_512-NEXT: umov w15, v0.b[15]
; VBITS_GE_512-NEXT: mov z3.b, z0.b[16]
; VBITS_GE_512-NEXT: mov z23.b, z0.b[28]
; VBITS_GE_512-NEXT: fmov w5, s21
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: orr w10, w10, w13
; VBITS_GE_512-NEXT: ubfiz w12, w25, #23, #1
; VBITS_GE_512-NEXT: mov z4.b, z0.b[17]
; VBITS_GE_512-NEXT: mov z24.b, z0.b[29]
; VBITS_GE_512-NEXT: orr w9, w11, w9
; VBITS_GE_512-NEXT: orr w10, w10, w16
; VBITS_GE_512-NEXT: ubfiz w11, w26, #24, #1
; VBITS_GE_512-NEXT: mov z2.b, z0.b[30]
; VBITS_GE_512-NEXT: fmov w28, s22
; VBITS_GE_512-NEXT: orr w10, w10, w12
; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_512-NEXT: ubfiz w13, w27, #25, #1
; VBITS_GE_512-NEXT: fmov w7, s3
; VBITS_GE_512-NEXT: fmov w29, s23
; VBITS_GE_512-NEXT: orr w10, w10, w11
; VBITS_GE_512-NEXT: ubfiz w14, w5, #26, #1
; VBITS_GE_512-NEXT: fmov w19, s4
; VBITS_GE_512-NEXT: fmov w30, s24
; VBITS_GE_512-NEXT: ubfiz w11, w15, #15, #1
; VBITS_GE_512-NEXT: bfi w6, w8, #6, #1
; VBITS_GE_512-NEXT: orr w8, w9, w12
; VBITS_GE_512-NEXT: orr w9, w10, w13
; VBITS_GE_512-NEXT: orr w9, w9, w14
; VBITS_GE_512-NEXT: ubfiz w10, w28, #27, #1
; VBITS_GE_512-NEXT: fmov w14, s2
; VBITS_GE_512-NEXT: orr w8, w8, w11
; VBITS_GE_512-NEXT: ubfiz w11, w7, #16, #1
; VBITS_GE_512-NEXT: ubfiz w13, w29, #28, #1
; VBITS_GE_512-NEXT: ubfiz w12, w19, #17, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: ubfiz w10, w30, #29, #1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[31]
; VBITS_GE_512-NEXT: orr w8, w8, w11
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: ubfiz w11, w14, #30, #1
; VBITS_GE_512-NEXT: orr w8, w8, w12
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: orr w8, w6, w8
; VBITS_GE_512-NEXT: orr w9, w9, w11
; VBITS_GE_512-NEXT: orr w8, w8, w9
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: orr w8, w8, w9, lsl #31
; VBITS_GE_512-NEXT: tbz w8, #0, .LBB19_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rb { z0.b }, p0/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #1
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB19_3
; VBITS_GE_512-NEXT: b .LBB19_4
; VBITS_GE_512-NEXT: .LBB19_2:
; VBITS_GE_512-NEXT: adrp x9, .LCPI19_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI19_0
; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB19_4
; VBITS_GE_512-NEXT: .LBB19_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: .LBB19_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB19_36
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB19_37
; VBITS_GE_512-NEXT: .LBB19_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB19_38
; VBITS_GE_512-NEXT: .LBB19_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB19_39
; VBITS_GE_512-NEXT: .LBB19_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB19_40
; VBITS_GE_512-NEXT: .LBB19_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB19_41
; VBITS_GE_512-NEXT: .LBB19_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB19_42
; VBITS_GE_512-NEXT: .LBB19_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB19_43
; VBITS_GE_512-NEXT: .LBB19_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB19_44
; VBITS_GE_512-NEXT: .LBB19_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB19_45
; VBITS_GE_512-NEXT: .LBB19_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB19_46
; VBITS_GE_512-NEXT: .LBB19_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB19_47
; VBITS_GE_512-NEXT: .LBB19_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB19_48
; VBITS_GE_512-NEXT: .LBB19_17: // %else54
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB19_49
; VBITS_GE_512-NEXT: .LBB19_18: // %else58
; VBITS_GE_512-NEXT: tbnz w8, #16, .LBB19_50
; VBITS_GE_512-NEXT: .LBB19_19: // %else62
; VBITS_GE_512-NEXT: tbnz w8, #17, .LBB19_51
; VBITS_GE_512-NEXT: .LBB19_20: // %else66
; VBITS_GE_512-NEXT: tbnz w8, #18, .LBB19_52
; VBITS_GE_512-NEXT: .LBB19_21: // %else70
; VBITS_GE_512-NEXT: tbnz w8, #19, .LBB19_53
; VBITS_GE_512-NEXT: .LBB19_22: // %else74
; VBITS_GE_512-NEXT: tbnz w8, #20, .LBB19_54
; VBITS_GE_512-NEXT: .LBB19_23: // %else78
; VBITS_GE_512-NEXT: tbnz w8, #21, .LBB19_55
; VBITS_GE_512-NEXT: .LBB19_24: // %else82
; VBITS_GE_512-NEXT: tbnz w8, #22, .LBB19_56
; VBITS_GE_512-NEXT: .LBB19_25: // %else86
; VBITS_GE_512-NEXT: tbnz w8, #23, .LBB19_57
; VBITS_GE_512-NEXT: .LBB19_26: // %else90
; VBITS_GE_512-NEXT: tbnz w8, #24, .LBB19_58
; VBITS_GE_512-NEXT: .LBB19_27: // %else94
; VBITS_GE_512-NEXT: tbnz w8, #25, .LBB19_59
; VBITS_GE_512-NEXT: .LBB19_28: // %else98
; VBITS_GE_512-NEXT: tbnz w8, #26, .LBB19_60
; VBITS_GE_512-NEXT: .LBB19_29: // %else102
; VBITS_GE_512-NEXT: tbnz w8, #27, .LBB19_61
; VBITS_GE_512-NEXT: .LBB19_30: // %else106
; VBITS_GE_512-NEXT: tbnz w8, #28, .LBB19_62
; VBITS_GE_512-NEXT: .LBB19_31: // %else110
; VBITS_GE_512-NEXT: tbnz w8, #29, .LBB19_63
; VBITS_GE_512-NEXT: .LBB19_32: // %else114
; VBITS_GE_512-NEXT: tbnz w8, #30, .LBB19_64
; VBITS_GE_512-NEXT: .LBB19_33: // %else118
; VBITS_GE_512-NEXT: tbz w8, #31, .LBB19_35
; VBITS_GE_512-NEXT: .LBB19_34: // %cond.load121
; VBITS_GE_512-NEXT: mov w8, #31 // =0x1f
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w8
; VBITS_GE_512-NEXT: ldrb w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w8
; VBITS_GE_512-NEXT: .LBB19_35: // %else122
; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
; VBITS_GE_512-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2]
; VBITS_GE_512-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: add sp, sp, #112
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB19_36: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB19_6
; VBITS_GE_512-NEXT: .LBB19_37: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB19_7
; VBITS_GE_512-NEXT: .LBB19_38: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB19_8
; VBITS_GE_512-NEXT: .LBB19_39: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB19_9
; VBITS_GE_512-NEXT: .LBB19_40: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB19_10
; VBITS_GE_512-NEXT: .LBB19_41: // %cond.load25
; VBITS_GE_512-NEXT: mov w9, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB19_11
; VBITS_GE_512-NEXT: .LBB19_42: // %cond.load29
; VBITS_GE_512-NEXT: mov w9, #8 // =0x8
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB19_12
; VBITS_GE_512-NEXT: .LBB19_43: // %cond.load33
; VBITS_GE_512-NEXT: mov w9, #9 // =0x9
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB19_13
; VBITS_GE_512-NEXT: .LBB19_44: // %cond.load37
; VBITS_GE_512-NEXT: mov w9, #10 // =0xa
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB19_14
; VBITS_GE_512-NEXT: .LBB19_45: // %cond.load41
; VBITS_GE_512-NEXT: mov w9, #11 // =0xb
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB19_15
; VBITS_GE_512-NEXT: .LBB19_46: // %cond.load45
; VBITS_GE_512-NEXT: mov w9, #12 // =0xc
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB19_16
; VBITS_GE_512-NEXT: .LBB19_47: // %cond.load49
; VBITS_GE_512-NEXT: mov w9, #13 // =0xd
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB19_17
; VBITS_GE_512-NEXT: .LBB19_48: // %cond.load53
; VBITS_GE_512-NEXT: mov w9, #14 // =0xe
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB19_18
; VBITS_GE_512-NEXT: .LBB19_49: // %cond.load57
; VBITS_GE_512-NEXT: mov w9, #15 // =0xf
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #16, .LBB19_19
; VBITS_GE_512-NEXT: .LBB19_50: // %cond.load61
; VBITS_GE_512-NEXT: mov w9, #16 // =0x10
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #17, .LBB19_20
; VBITS_GE_512-NEXT: .LBB19_51: // %cond.load65
; VBITS_GE_512-NEXT: mov w9, #17 // =0x11
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #18, .LBB19_21
; VBITS_GE_512-NEXT: .LBB19_52: // %cond.load69
; VBITS_GE_512-NEXT: mov w9, #18 // =0x12
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #19, .LBB19_22
; VBITS_GE_512-NEXT: .LBB19_53: // %cond.load73
; VBITS_GE_512-NEXT: mov w9, #19 // =0x13
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #20, .LBB19_23
; VBITS_GE_512-NEXT: .LBB19_54: // %cond.load77
; VBITS_GE_512-NEXT: mov w9, #20 // =0x14
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #21, .LBB19_24
; VBITS_GE_512-NEXT: .LBB19_55: // %cond.load81
; VBITS_GE_512-NEXT: mov w9, #21 // =0x15
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #22, .LBB19_25
; VBITS_GE_512-NEXT: .LBB19_56: // %cond.load85
; VBITS_GE_512-NEXT: mov w9, #22 // =0x16
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #23, .LBB19_26
; VBITS_GE_512-NEXT: .LBB19_57: // %cond.load89
; VBITS_GE_512-NEXT: mov w9, #23 // =0x17
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #24, .LBB19_27
; VBITS_GE_512-NEXT: .LBB19_58: // %cond.load93
; VBITS_GE_512-NEXT: mov w9, #24 // =0x18
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #25, .LBB19_28
; VBITS_GE_512-NEXT: .LBB19_59: // %cond.load97
; VBITS_GE_512-NEXT: mov w9, #25 // =0x19
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #26, .LBB19_29
; VBITS_GE_512-NEXT: .LBB19_60: // %cond.load101
; VBITS_GE_512-NEXT: mov w9, #26 // =0x1a
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #27, .LBB19_30
; VBITS_GE_512-NEXT: .LBB19_61: // %cond.load105
; VBITS_GE_512-NEXT: mov w9, #27 // =0x1b
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #28, .LBB19_31
; VBITS_GE_512-NEXT: .LBB19_62: // %cond.load109
; VBITS_GE_512-NEXT: mov w9, #28 // =0x1c
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #29, .LBB19_32
; VBITS_GE_512-NEXT: .LBB19_63: // %cond.load113
; VBITS_GE_512-NEXT: mov w9, #29 // =0x1d
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #30, .LBB19_33
; VBITS_GE_512-NEXT: .LBB19_64: // %cond.load117
; VBITS_GE_512-NEXT: mov w9, #30 // =0x1e
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p1/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #31, .LBB19_34
; VBITS_GE_512-NEXT: b .LBB19_35
;
; CHECK-EXPAND-LABEL: masked_load_zext_v32i8i16:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.b, vl32
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b
; CHECK-EXPAND-NEXT: whilelo p0.b, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #16 // =0x10
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.h, vl16
; CHECK-EXPAND-NEXT: expand z0.b, p1, z0.b
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: uunpklo z0.h, z0.b
; CHECK-EXPAND-NEXT: uunpklo z1.h, z1.b
; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-EXPAND-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1]
; CHECK-EXPAND-NEXT: ret
%b = load <32 x i8>, ptr %bp
%mask = icmp eq <32 x i8> %b, zeroinitializer
%load = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %ap, <32 x i1> %mask, <32 x i8> poison)
%ext = zext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, ptr %c
ret void
}
define void @masked_load_zext_v16i8i32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v16i8i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x1]
; VBITS_GE_256-NEXT: adrp x8, .LCPI20_0
; VBITS_GE_256-NEXT: ldr q1, [x8, :lo12:.LCPI20_0]
; VBITS_GE_256-NEXT: cmeq v0.16b, v0.16b, #0
; VBITS_GE_256-NEXT: and v0.16b, v0.16b, v1.16b
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b
; VBITS_GE_256-NEXT: addv h0, v0.8h
; VBITS_GE_256-NEXT: fmov w8, s0
; VBITS_GE_256-NEXT: tbz w8, #0, .LBB20_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: fmov s0, w9
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB20_3
; VBITS_GE_256-NEXT: b .LBB20_4
; VBITS_GE_256-NEXT: .LBB20_2:
; VBITS_GE_256-NEXT: // implicit-def: $q0
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB20_4
; VBITS_GE_256-NEXT: .LBB20_3: // %cond.load1
; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_256-NEXT: .LBB20_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB20_20
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB20_21
; VBITS_GE_256-NEXT: .LBB20_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB20_22
; VBITS_GE_256-NEXT: .LBB20_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB20_23
; VBITS_GE_256-NEXT: .LBB20_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB20_24
; VBITS_GE_256-NEXT: .LBB20_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB20_25
; VBITS_GE_256-NEXT: .LBB20_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB20_26
; VBITS_GE_256-NEXT: .LBB20_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB20_27
; VBITS_GE_256-NEXT: .LBB20_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB20_28
; VBITS_GE_256-NEXT: .LBB20_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB20_29
; VBITS_GE_256-NEXT: .LBB20_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB20_30
; VBITS_GE_256-NEXT: .LBB20_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB20_31
; VBITS_GE_256-NEXT: .LBB20_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB20_32
; VBITS_GE_256-NEXT: .LBB20_17: // %else54
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB20_19
; VBITS_GE_256-NEXT: .LBB20_18: // %cond.load57
; VBITS_GE_256-NEXT: ld1 { v0.b }[15], [x0]
; VBITS_GE_256-NEXT: .LBB20_19: // %else58
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB20_20: // %cond.load5
; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB20_6
; VBITS_GE_256-NEXT: .LBB20_21: // %cond.load9
; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB20_7
; VBITS_GE_256-NEXT: .LBB20_22: // %cond.load13
; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB20_8
; VBITS_GE_256-NEXT: .LBB20_23: // %cond.load17
; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB20_9
; VBITS_GE_256-NEXT: .LBB20_24: // %cond.load21
; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB20_10
; VBITS_GE_256-NEXT: .LBB20_25: // %cond.load25
; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB20_11
; VBITS_GE_256-NEXT: .LBB20_26: // %cond.load29
; VBITS_GE_256-NEXT: ld1 { v0.b }[8], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB20_12
; VBITS_GE_256-NEXT: .LBB20_27: // %cond.load33
; VBITS_GE_256-NEXT: ld1 { v0.b }[9], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB20_13
; VBITS_GE_256-NEXT: .LBB20_28: // %cond.load37
; VBITS_GE_256-NEXT: ld1 { v0.b }[10], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB20_14
; VBITS_GE_256-NEXT: .LBB20_29: // %cond.load41
; VBITS_GE_256-NEXT: ld1 { v0.b }[11], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB20_15
; VBITS_GE_256-NEXT: .LBB20_30: // %cond.load45
; VBITS_GE_256-NEXT: ld1 { v0.b }[12], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB20_16
; VBITS_GE_256-NEXT: .LBB20_31: // %cond.load49
; VBITS_GE_256-NEXT: ld1 { v0.b }[13], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB20_17
; VBITS_GE_256-NEXT: .LBB20_32: // %cond.load53
; VBITS_GE_256-NEXT: ld1 { v0.b }[14], [x0], #1
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB20_18
; VBITS_GE_256-NEXT: b .LBB20_19
;
; VBITS_GE_512-LABEL: masked_load_zext_v16i8i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ldr q0, [x1]
; VBITS_GE_512-NEXT: adrp x8, .LCPI20_0
; VBITS_GE_512-NEXT: ldr q1, [x8, :lo12:.LCPI20_0]
; VBITS_GE_512-NEXT: cmeq v0.16b, v0.16b, #0
; VBITS_GE_512-NEXT: and v0.16b, v0.16b, v1.16b
; VBITS_GE_512-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_512-NEXT: zip1 v0.16b, v0.16b, v1.16b
; VBITS_GE_512-NEXT: addv h0, v0.8h
; VBITS_GE_512-NEXT: fmov w8, s0
; VBITS_GE_512-NEXT: tbz w8, #0, .LBB20_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: fmov s0, w9
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB20_3
; VBITS_GE_512-NEXT: b .LBB20_4
; VBITS_GE_512-NEXT: .LBB20_2:
; VBITS_GE_512-NEXT: // implicit-def: $q0
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB20_4
; VBITS_GE_512-NEXT: .LBB20_3: // %cond.load1
; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_512-NEXT: .LBB20_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB20_20
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB20_21
; VBITS_GE_512-NEXT: .LBB20_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB20_22
; VBITS_GE_512-NEXT: .LBB20_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB20_23
; VBITS_GE_512-NEXT: .LBB20_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB20_24
; VBITS_GE_512-NEXT: .LBB20_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB20_25
; VBITS_GE_512-NEXT: .LBB20_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB20_26
; VBITS_GE_512-NEXT: .LBB20_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB20_27
; VBITS_GE_512-NEXT: .LBB20_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB20_28
; VBITS_GE_512-NEXT: .LBB20_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB20_29
; VBITS_GE_512-NEXT: .LBB20_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB20_30
; VBITS_GE_512-NEXT: .LBB20_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB20_31
; VBITS_GE_512-NEXT: .LBB20_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB20_32
; VBITS_GE_512-NEXT: .LBB20_17: // %else54
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB20_19
; VBITS_GE_512-NEXT: .LBB20_18: // %cond.load57
; VBITS_GE_512-NEXT: ld1 { v0.b }[15], [x0]
; VBITS_GE_512-NEXT: .LBB20_19: // %else58
; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB20_20: // %cond.load5
; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB20_6
; VBITS_GE_512-NEXT: .LBB20_21: // %cond.load9
; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB20_7
; VBITS_GE_512-NEXT: .LBB20_22: // %cond.load13
; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB20_8
; VBITS_GE_512-NEXT: .LBB20_23: // %cond.load17
; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB20_9
; VBITS_GE_512-NEXT: .LBB20_24: // %cond.load21
; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB20_10
; VBITS_GE_512-NEXT: .LBB20_25: // %cond.load25
; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB20_11
; VBITS_GE_512-NEXT: .LBB20_26: // %cond.load29
; VBITS_GE_512-NEXT: ld1 { v0.b }[8], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB20_12
; VBITS_GE_512-NEXT: .LBB20_27: // %cond.load33
; VBITS_GE_512-NEXT: ld1 { v0.b }[9], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB20_13
; VBITS_GE_512-NEXT: .LBB20_28: // %cond.load37
; VBITS_GE_512-NEXT: ld1 { v0.b }[10], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB20_14
; VBITS_GE_512-NEXT: .LBB20_29: // %cond.load41
; VBITS_GE_512-NEXT: ld1 { v0.b }[11], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB20_15
; VBITS_GE_512-NEXT: .LBB20_30: // %cond.load45
; VBITS_GE_512-NEXT: ld1 { v0.b }[12], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB20_16
; VBITS_GE_512-NEXT: .LBB20_31: // %cond.load49
; VBITS_GE_512-NEXT: ld1 { v0.b }[13], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB20_17
; VBITS_GE_512-NEXT: .LBB20_32: // %cond.load53
; VBITS_GE_512-NEXT: ld1 { v0.b }[14], [x0], #1
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB20_18
; VBITS_GE_512-NEXT: b .LBB20_19
;
; CHECK-EXPAND-LABEL: masked_load_zext_v16i8i32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.b, vl16
; CHECK-EXPAND-NEXT: ldr q0, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b
; CHECK-EXPAND-NEXT: whilelo p0.b, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: expand z0.b, p1, z0.b
; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-EXPAND-NEXT: uunpklo z0.h, z0.b
; CHECK-EXPAND-NEXT: uunpklo z1.h, z1.b
; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; CHECK-EXPAND-NEXT: ret
%b = load <16 x i8>, ptr %bp
%mask = icmp eq <16 x i8> %b, zeroinitializer
%load = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %ap, <16 x i1> %mask, <16 x i8> poison)
%ext = zext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, ptr %c
ret void
}
define void @masked_load_zext_v8i8i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v8i8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr d0, [x1]
; VBITS_GE_256-NEXT: adrp x8, .LCPI21_0
; VBITS_GE_256-NEXT: ldr d1, [x8, :lo12:.LCPI21_0]
; VBITS_GE_256-NEXT: cmeq v0.8b, v0.8b, #0
; VBITS_GE_256-NEXT: and v0.8b, v0.8b, v1.8b
; VBITS_GE_256-NEXT: addv b0, v0.8b
; VBITS_GE_256-NEXT: fmov w8, s0
; VBITS_GE_256-NEXT: tbz w8, #0, .LBB21_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: fmov s0, w9
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB21_3
; VBITS_GE_256-NEXT: b .LBB21_4
; VBITS_GE_256-NEXT: .LBB21_2:
; VBITS_GE_256-NEXT: // implicit-def: $d0
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB21_4
; VBITS_GE_256-NEXT: .LBB21_3: // %cond.load1
; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_256-NEXT: .LBB21_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB21_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB21_13
; VBITS_GE_256-NEXT: .LBB21_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB21_14
; VBITS_GE_256-NEXT: .LBB21_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB21_15
; VBITS_GE_256-NEXT: .LBB21_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB21_16
; VBITS_GE_256-NEXT: .LBB21_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB21_11
; VBITS_GE_256-NEXT: .LBB21_10: // %cond.load25
; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0]
; VBITS_GE_256-NEXT: .LBB21_11: // %else26
; VBITS_GE_256-NEXT: ushll v0.8h, v0.8b, #0
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB21_12: // %cond.load5
; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB21_6
; VBITS_GE_256-NEXT: .LBB21_13: // %cond.load9
; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB21_7
; VBITS_GE_256-NEXT: .LBB21_14: // %cond.load13
; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB21_8
; VBITS_GE_256-NEXT: .LBB21_15: // %cond.load17
; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB21_9
; VBITS_GE_256-NEXT: .LBB21_16: // %cond.load21
; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB21_10
; VBITS_GE_256-NEXT: b .LBB21_11
;
; VBITS_GE_512-LABEL: masked_load_zext_v8i8i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ldr d0, [x1]
; VBITS_GE_512-NEXT: adrp x8, .LCPI21_0
; VBITS_GE_512-NEXT: ldr d1, [x8, :lo12:.LCPI21_0]
; VBITS_GE_512-NEXT: cmeq v0.8b, v0.8b, #0
; VBITS_GE_512-NEXT: and v0.8b, v0.8b, v1.8b
; VBITS_GE_512-NEXT: addv b0, v0.8b
; VBITS_GE_512-NEXT: fmov w8, s0
; VBITS_GE_512-NEXT: tbz w8, #0, .LBB21_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: fmov s0, w9
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB21_3
; VBITS_GE_512-NEXT: b .LBB21_4
; VBITS_GE_512-NEXT: .LBB21_2:
; VBITS_GE_512-NEXT: // implicit-def: $d0
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB21_4
; VBITS_GE_512-NEXT: .LBB21_3: // %cond.load1
; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_512-NEXT: .LBB21_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB21_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB21_13
; VBITS_GE_512-NEXT: .LBB21_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB21_14
; VBITS_GE_512-NEXT: .LBB21_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB21_15
; VBITS_GE_512-NEXT: .LBB21_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB21_16
; VBITS_GE_512-NEXT: .LBB21_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB21_11
; VBITS_GE_512-NEXT: .LBB21_10: // %cond.load25
; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0]
; VBITS_GE_512-NEXT: .LBB21_11: // %else26
; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB21_12: // %cond.load5
; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB21_6
; VBITS_GE_512-NEXT: .LBB21_13: // %cond.load9
; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB21_7
; VBITS_GE_512-NEXT: .LBB21_14: // %cond.load13
; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB21_8
; VBITS_GE_512-NEXT: .LBB21_15: // %cond.load17
; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB21_9
; VBITS_GE_512-NEXT: .LBB21_16: // %cond.load21
; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB21_10
; VBITS_GE_512-NEXT: b .LBB21_11
;
; CHECK-EXPAND-LABEL: masked_load_zext_v8i8i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.b, vl8
; CHECK-EXPAND-NEXT: ldr d0, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.b, p0/z, z0.b, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.b
; CHECK-EXPAND-NEXT: whilelo p0.b, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: expand z0.b, p1, z0.b
; CHECK-EXPAND-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i8>, ptr %bp
%mask = icmp eq <8 x i8> %b, zeroinitializer
%load = call <8 x i8> @llvm.masked.expandload.v8i8(ptr %ap, <8 x i1> %mask, <8 x i8> poison)
%ext = zext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_zext_v16i16i32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v16i16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p1.h, vl16
; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p0.h, p1/z, z0.h, #0
; VBITS_GE_256-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p0.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: umov w11, v0.b[7]
; VBITS_GE_256-NEXT: umov w12, v0.b[8]
; VBITS_GE_256-NEXT: umov w13, v0.b[3]
; VBITS_GE_256-NEXT: umov w14, v0.b[4]
; VBITS_GE_256-NEXT: umov w15, v0.b[10]
; VBITS_GE_256-NEXT: umov w16, v0.b[5]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[9]
; VBITS_GE_256-NEXT: ubfiz w11, w11, #7, #1
; VBITS_GE_256-NEXT: ubfiz w12, w12, #8, #1
; VBITS_GE_256-NEXT: ubfiz w15, w15, #10, #1
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[11]
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: umov w12, v0.b[13]
; VBITS_GE_256-NEXT: bfi w8, w13, #3, #1
; VBITS_GE_256-NEXT: umov w13, v0.b[12]
; VBITS_GE_256-NEXT: ubfiz w9, w9, #9, #1
; VBITS_GE_256-NEXT: bfi w8, w14, #4, #1
; VBITS_GE_256-NEXT: umov w14, v0.b[14]
; VBITS_GE_256-NEXT: orr w9, w11, w9
; VBITS_GE_256-NEXT: umov w11, v0.b[6]
; VBITS_GE_256-NEXT: ubfiz w10, w10, #11, #1
; VBITS_GE_256-NEXT: orr w9, w9, w15
; VBITS_GE_256-NEXT: ubfiz w13, w13, #12, #1
; VBITS_GE_256-NEXT: bfi w8, w16, #5, #1
; VBITS_GE_256-NEXT: orr w9, w9, w10
; VBITS_GE_256-NEXT: ubfiz w10, w12, #13, #1
; VBITS_GE_256-NEXT: orr w9, w9, w13
; VBITS_GE_256-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_256-NEXT: umov w13, v0.b[15]
; VBITS_GE_256-NEXT: bfi w8, w11, #6, #1
; VBITS_GE_256-NEXT: orr w9, w9, w10
; VBITS_GE_256-NEXT: orr w9, w9, w12
; VBITS_GE_256-NEXT: orr w8, w8, w9
; VBITS_GE_256-NEXT: orr w9, w8, w13, lsl #15
; VBITS_GE_256-NEXT: and w8, w9, #0xffff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB22_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rh { z0.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #2
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB22_3
; VBITS_GE_256-NEXT: b .LBB22_4
; VBITS_GE_256-NEXT: .LBB22_2:
; VBITS_GE_256-NEXT: adrp x9, .LCPI22_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI22_0
; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB22_4
; VBITS_GE_256-NEXT: .LBB22_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: .LBB22_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB22_20
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB22_21
; VBITS_GE_256-NEXT: .LBB22_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB22_22
; VBITS_GE_256-NEXT: .LBB22_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB22_23
; VBITS_GE_256-NEXT: .LBB22_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB22_24
; VBITS_GE_256-NEXT: .LBB22_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB22_25
; VBITS_GE_256-NEXT: .LBB22_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB22_26
; VBITS_GE_256-NEXT: .LBB22_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB22_27
; VBITS_GE_256-NEXT: .LBB22_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB22_28
; VBITS_GE_256-NEXT: .LBB22_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB22_29
; VBITS_GE_256-NEXT: .LBB22_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB22_30
; VBITS_GE_256-NEXT: .LBB22_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB22_31
; VBITS_GE_256-NEXT: .LBB22_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB22_32
; VBITS_GE_256-NEXT: .LBB22_17: // %else54
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB22_19
; VBITS_GE_256-NEXT: .LBB22_18: // %cond.load57
; VBITS_GE_256-NEXT: mov w8, #15 // =0xf
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w8
; VBITS_GE_256-NEXT: ldrh w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w8
; VBITS_GE_256-NEXT: .LBB22_19: // %else58
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB22_20: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB22_6
; VBITS_GE_256-NEXT: .LBB22_21: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB22_7
; VBITS_GE_256-NEXT: .LBB22_22: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB22_8
; VBITS_GE_256-NEXT: .LBB22_23: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB22_9
; VBITS_GE_256-NEXT: .LBB22_24: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB22_10
; VBITS_GE_256-NEXT: .LBB22_25: // %cond.load25
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB22_11
; VBITS_GE_256-NEXT: .LBB22_26: // %cond.load29
; VBITS_GE_256-NEXT: mov w9, #8 // =0x8
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB22_12
; VBITS_GE_256-NEXT: .LBB22_27: // %cond.load33
; VBITS_GE_256-NEXT: mov w9, #9 // =0x9
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB22_13
; VBITS_GE_256-NEXT: .LBB22_28: // %cond.load37
; VBITS_GE_256-NEXT: mov w9, #10 // =0xa
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB22_14
; VBITS_GE_256-NEXT: .LBB22_29: // %cond.load41
; VBITS_GE_256-NEXT: mov w9, #11 // =0xb
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB22_15
; VBITS_GE_256-NEXT: .LBB22_30: // %cond.load45
; VBITS_GE_256-NEXT: mov w9, #12 // =0xc
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB22_16
; VBITS_GE_256-NEXT: .LBB22_31: // %cond.load49
; VBITS_GE_256-NEXT: mov w9, #13 // =0xd
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB22_17
; VBITS_GE_256-NEXT: .LBB22_32: // %cond.load53
; VBITS_GE_256-NEXT: mov w9, #14 // =0xe
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB22_18
; VBITS_GE_256-NEXT: b .LBB22_19
;
; VBITS_GE_512-LABEL: masked_load_zext_v16i16i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p1.h, vl16
; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p0.h, p1/z, z0.h, #0
; VBITS_GE_512-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: umov w11, v0.b[7]
; VBITS_GE_512-NEXT: umov w12, v0.b[8]
; VBITS_GE_512-NEXT: umov w13, v0.b[3]
; VBITS_GE_512-NEXT: umov w14, v0.b[4]
; VBITS_GE_512-NEXT: umov w15, v0.b[10]
; VBITS_GE_512-NEXT: umov w16, v0.b[5]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[9]
; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1
; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1
; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[11]
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: umov w12, v0.b[13]
; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[12]
; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1
; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: orr w9, w11, w9
; VBITS_GE_512-NEXT: umov w11, v0.b[6]
; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1
; VBITS_GE_512-NEXT: orr w9, w9, w15
; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1
; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[15]
; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: orr w9, w9, w12
; VBITS_GE_512-NEXT: orr w8, w8, w9
; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15
; VBITS_GE_512-NEXT: and w8, w9, #0xffff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB22_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rh { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #2
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB22_3
; VBITS_GE_512-NEXT: b .LBB22_4
; VBITS_GE_512-NEXT: .LBB22_2:
; VBITS_GE_512-NEXT: adrp x9, .LCPI22_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI22_0
; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB22_4
; VBITS_GE_512-NEXT: .LBB22_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: .LBB22_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB22_20
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB22_21
; VBITS_GE_512-NEXT: .LBB22_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB22_22
; VBITS_GE_512-NEXT: .LBB22_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB22_23
; VBITS_GE_512-NEXT: .LBB22_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB22_24
; VBITS_GE_512-NEXT: .LBB22_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB22_25
; VBITS_GE_512-NEXT: .LBB22_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB22_26
; VBITS_GE_512-NEXT: .LBB22_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB22_27
; VBITS_GE_512-NEXT: .LBB22_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB22_28
; VBITS_GE_512-NEXT: .LBB22_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB22_29
; VBITS_GE_512-NEXT: .LBB22_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB22_30
; VBITS_GE_512-NEXT: .LBB22_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB22_31
; VBITS_GE_512-NEXT: .LBB22_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB22_32
; VBITS_GE_512-NEXT: .LBB22_17: // %else54
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB22_19
; VBITS_GE_512-NEXT: .LBB22_18: // %cond.load57
; VBITS_GE_512-NEXT: mov w8, #15 // =0xf
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w8
; VBITS_GE_512-NEXT: ldrh w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w8
; VBITS_GE_512-NEXT: .LBB22_19: // %else58
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB22_20: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB22_6
; VBITS_GE_512-NEXT: .LBB22_21: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB22_7
; VBITS_GE_512-NEXT: .LBB22_22: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB22_8
; VBITS_GE_512-NEXT: .LBB22_23: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB22_9
; VBITS_GE_512-NEXT: .LBB22_24: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB22_10
; VBITS_GE_512-NEXT: .LBB22_25: // %cond.load25
; VBITS_GE_512-NEXT: mov w9, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB22_11
; VBITS_GE_512-NEXT: .LBB22_26: // %cond.load29
; VBITS_GE_512-NEXT: mov w9, #8 // =0x8
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB22_12
; VBITS_GE_512-NEXT: .LBB22_27: // %cond.load33
; VBITS_GE_512-NEXT: mov w9, #9 // =0x9
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB22_13
; VBITS_GE_512-NEXT: .LBB22_28: // %cond.load37
; VBITS_GE_512-NEXT: mov w9, #10 // =0xa
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB22_14
; VBITS_GE_512-NEXT: .LBB22_29: // %cond.load41
; VBITS_GE_512-NEXT: mov w9, #11 // =0xb
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB22_15
; VBITS_GE_512-NEXT: .LBB22_30: // %cond.load45
; VBITS_GE_512-NEXT: mov w9, #12 // =0xc
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB22_16
; VBITS_GE_512-NEXT: .LBB22_31: // %cond.load49
; VBITS_GE_512-NEXT: mov w9, #13 // =0xd
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB22_17
; VBITS_GE_512-NEXT: .LBB22_32: // %cond.load53
; VBITS_GE_512-NEXT: mov w9, #14 // =0xe
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p1/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB22_18
; VBITS_GE_512-NEXT: b .LBB22_19
;
; CHECK-EXPAND-LABEL: masked_load_zext_v16i16i32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.h, vl16
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h
; CHECK-EXPAND-NEXT: whilelo p0.h, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; CHECK-EXPAND-NEXT: ret
%b = load <16 x i16>, ptr %bp
%mask = icmp eq <16 x i16> %b, zeroinitializer
%load = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %ap, <16 x i1> %mask, <16 x i16> poison)
%ext = zext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, ptr %c
ret void
}
define void @masked_load_zext_v8i16i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v8i16i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: ldr q0, [x1]
; VBITS_GE_256-NEXT: adrp x8, .LCPI23_0
; VBITS_GE_256-NEXT: ldr q1, [x8, :lo12:.LCPI23_0]
; VBITS_GE_256-NEXT: cmeq v0.8h, v0.8h, #0
; VBITS_GE_256-NEXT: and v0.16b, v0.16b, v1.16b
; VBITS_GE_256-NEXT: addv h0, v0.8h
; VBITS_GE_256-NEXT: fmov w8, s0
; VBITS_GE_256-NEXT: tbz w8, #0, .LBB23_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: fmov s0, w9
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB23_3
; VBITS_GE_256-NEXT: b .LBB23_4
; VBITS_GE_256-NEXT: .LBB23_2:
; VBITS_GE_256-NEXT: // implicit-def: $q0
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB23_4
; VBITS_GE_256-NEXT: .LBB23_3: // %cond.load1
; VBITS_GE_256-NEXT: ld1 { v0.h }[1], [x0], #2
; VBITS_GE_256-NEXT: .LBB23_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB23_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB23_13
; VBITS_GE_256-NEXT: .LBB23_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB23_14
; VBITS_GE_256-NEXT: .LBB23_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB23_15
; VBITS_GE_256-NEXT: .LBB23_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB23_16
; VBITS_GE_256-NEXT: .LBB23_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB23_11
; VBITS_GE_256-NEXT: .LBB23_10: // %cond.load25
; VBITS_GE_256-NEXT: ld1 { v0.h }[7], [x0]
; VBITS_GE_256-NEXT: .LBB23_11: // %else26
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB23_12: // %cond.load5
; VBITS_GE_256-NEXT: ld1 { v0.h }[2], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB23_6
; VBITS_GE_256-NEXT: .LBB23_13: // %cond.load9
; VBITS_GE_256-NEXT: ld1 { v0.h }[3], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB23_7
; VBITS_GE_256-NEXT: .LBB23_14: // %cond.load13
; VBITS_GE_256-NEXT: ld1 { v0.h }[4], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB23_8
; VBITS_GE_256-NEXT: .LBB23_15: // %cond.load17
; VBITS_GE_256-NEXT: ld1 { v0.h }[5], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB23_9
; VBITS_GE_256-NEXT: .LBB23_16: // %cond.load21
; VBITS_GE_256-NEXT: ld1 { v0.h }[6], [x0], #2
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB23_10
; VBITS_GE_256-NEXT: b .LBB23_11
;
; VBITS_GE_512-LABEL: masked_load_zext_v8i16i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ldr q0, [x1]
; VBITS_GE_512-NEXT: adrp x8, .LCPI23_0
; VBITS_GE_512-NEXT: ldr q1, [x8, :lo12:.LCPI23_0]
; VBITS_GE_512-NEXT: cmeq v0.8h, v0.8h, #0
; VBITS_GE_512-NEXT: and v0.16b, v0.16b, v1.16b
; VBITS_GE_512-NEXT: addv h0, v0.8h
; VBITS_GE_512-NEXT: fmov w8, s0
; VBITS_GE_512-NEXT: tbz w8, #0, .LBB23_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: fmov s0, w9
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB23_3
; VBITS_GE_512-NEXT: b .LBB23_4
; VBITS_GE_512-NEXT: .LBB23_2:
; VBITS_GE_512-NEXT: // implicit-def: $q0
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB23_4
; VBITS_GE_512-NEXT: .LBB23_3: // %cond.load1
; VBITS_GE_512-NEXT: ld1 { v0.h }[1], [x0], #2
; VBITS_GE_512-NEXT: .LBB23_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB23_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB23_13
; VBITS_GE_512-NEXT: .LBB23_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB23_14
; VBITS_GE_512-NEXT: .LBB23_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB23_15
; VBITS_GE_512-NEXT: .LBB23_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB23_16
; VBITS_GE_512-NEXT: .LBB23_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB23_11
; VBITS_GE_512-NEXT: .LBB23_10: // %cond.load25
; VBITS_GE_512-NEXT: ld1 { v0.h }[7], [x0]
; VBITS_GE_512-NEXT: .LBB23_11: // %else26
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB23_12: // %cond.load5
; VBITS_GE_512-NEXT: ld1 { v0.h }[2], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB23_6
; VBITS_GE_512-NEXT: .LBB23_13: // %cond.load9
; VBITS_GE_512-NEXT: ld1 { v0.h }[3], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB23_7
; VBITS_GE_512-NEXT: .LBB23_14: // %cond.load13
; VBITS_GE_512-NEXT: ld1 { v0.h }[4], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB23_8
; VBITS_GE_512-NEXT: .LBB23_15: // %cond.load17
; VBITS_GE_512-NEXT: ld1 { v0.h }[5], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB23_9
; VBITS_GE_512-NEXT: .LBB23_16: // %cond.load21
; VBITS_GE_512-NEXT: ld1 { v0.h }[6], [x0], #2
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB23_10
; VBITS_GE_512-NEXT: b .LBB23_11
;
; CHECK-EXPAND-LABEL: masked_load_zext_v8i16i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.h, vl8
; CHECK-EXPAND-NEXT: ldr q0, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h
; CHECK-EXPAND-NEXT: whilelo p0.h, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h
; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i16>, ptr %bp
%mask = icmp eq <8 x i16> %b, zeroinitializer
%load = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %ap, <8 x i1> %mask, <8 x i16> poison)
%ext = zext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_zext_v8i32i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v8i32i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p0.s, p1/z, z0.s, #0
; VBITS_GE_256-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p0.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB24_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rw { z0.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #4
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB24_3
; VBITS_GE_256-NEXT: b .LBB24_4
; VBITS_GE_256-NEXT: .LBB24_2:
; VBITS_GE_256-NEXT: adrp x9, .LCPI24_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI24_0
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB24_4
; VBITS_GE_256-NEXT: .LBB24_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: .LBB24_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB24_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB24_13
; VBITS_GE_256-NEXT: .LBB24_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB24_14
; VBITS_GE_256-NEXT: .LBB24_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB24_15
; VBITS_GE_256-NEXT: .LBB24_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB24_16
; VBITS_GE_256-NEXT: .LBB24_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB24_11
; VBITS_GE_256-NEXT: .LBB24_10: // %cond.load25
; VBITS_GE_256-NEXT: mov w8, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w8
; VBITS_GE_256-NEXT: ldr w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w8
; VBITS_GE_256-NEXT: .LBB24_11: // %else26
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB24_12: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB24_6
; VBITS_GE_256-NEXT: .LBB24_13: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB24_7
; VBITS_GE_256-NEXT: .LBB24_14: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB24_8
; VBITS_GE_256-NEXT: .LBB24_15: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB24_9
; VBITS_GE_256-NEXT: .LBB24_16: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB24_10
; VBITS_GE_256-NEXT: b .LBB24_11
;
; VBITS_GE_512-LABEL: masked_load_zext_v8i32i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p1.s, vl8
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p0.s, p1/z, z0.s, #0
; VBITS_GE_512-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB24_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rw { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #4
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB24_3
; VBITS_GE_512-NEXT: b .LBB24_4
; VBITS_GE_512-NEXT: .LBB24_2:
; VBITS_GE_512-NEXT: adrp x9, .LCPI24_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI24_0
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB24_4
; VBITS_GE_512-NEXT: .LBB24_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: .LBB24_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB24_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB24_13
; VBITS_GE_512-NEXT: .LBB24_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB24_14
; VBITS_GE_512-NEXT: .LBB24_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB24_15
; VBITS_GE_512-NEXT: .LBB24_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB24_16
; VBITS_GE_512-NEXT: .LBB24_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB24_11
; VBITS_GE_512-NEXT: .LBB24_10: // %cond.load25
; VBITS_GE_512-NEXT: mov w8, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w8
; VBITS_GE_512-NEXT: ldr w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w8
; VBITS_GE_512-NEXT: .LBB24_11: // %else26
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB24_12: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB24_6
; VBITS_GE_512-NEXT: .LBB24_13: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB24_7
; VBITS_GE_512-NEXT: .LBB24_14: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB24_8
; VBITS_GE_512-NEXT: .LBB24_15: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB24_9
; VBITS_GE_512-NEXT: .LBB24_16: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB24_10
; VBITS_GE_512-NEXT: b .LBB24_11
;
; CHECK-EXPAND-LABEL: masked_load_zext_v8i32i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s
; CHECK-EXPAND-NEXT: whilelo p0.s, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i32>, ptr %bp
%mask = icmp eq <8 x i32> %b, zeroinitializer
%load = call <8 x i32> @llvm.masked.expandload.v8i32(ptr %ap, <8 x i1> %mask, <8 x i32> poison)
%ext = zext <8 x i32> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_sext_v32i8i16_m16(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v32i8i16_m16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z1.h, #0
; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z2.h, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.b
; VBITS_GE_256-NEXT: uzp1 z1.b, z0.b, z0.b
; VBITS_GE_256-NEXT: uzp1 z0.b, z2.b, z2.b
; VBITS_GE_256-NEXT: umov w8, v1.b[0]
; VBITS_GE_256-NEXT: umov w11, v0.b[3]
; VBITS_GE_256-NEXT: umov w12, v0.b[4]
; VBITS_GE_256-NEXT: umov w13, v1.b[1]
; VBITS_GE_256-NEXT: umov w9, v1.b[7]
; VBITS_GE_256-NEXT: umov w10, v1.b[8]
; VBITS_GE_256-NEXT: umov w16, v1.b[9]
; VBITS_GE_256-NEXT: umov w17, v1.b[10]
; VBITS_GE_256-NEXT: umov w18, v0.b[5]
; VBITS_GE_256-NEXT: umov w14, v1.b[2]
; VBITS_GE_256-NEXT: umov w15, v1.b[3]
; VBITS_GE_256-NEXT: umov w1, v1.b[4]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: ubfiz w11, w11, #19, #1
; VBITS_GE_256-NEXT: ubfiz w12, w12, #20, #1
; VBITS_GE_256-NEXT: bfi w8, w13, #1, #1
; VBITS_GE_256-NEXT: umov w13, v0.b[6]
; VBITS_GE_256-NEXT: ubfiz w9, w9, #7, #1
; VBITS_GE_256-NEXT: ubfiz w10, w10, #8, #1
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: umov w12, v1.b[11]
; VBITS_GE_256-NEXT: ubfiz w16, w16, #9, #1
; VBITS_GE_256-NEXT: ubfiz w17, w17, #10, #1
; VBITS_GE_256-NEXT: ubfiz w18, w18, #21, #1
; VBITS_GE_256-NEXT: orr w9, w9, w10
; VBITS_GE_256-NEXT: bfi w8, w14, #2, #1
; VBITS_GE_256-NEXT: umov w14, v0.b[7]
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: umov w16, v1.b[12]
; VBITS_GE_256-NEXT: ubfiz w13, w13, #22, #1
; VBITS_GE_256-NEXT: orr w11, w11, w18
; VBITS_GE_256-NEXT: umov w18, v0.b[8]
; VBITS_GE_256-NEXT: orr w9, w9, w17
; VBITS_GE_256-NEXT: umov w17, v1.b[13]
; VBITS_GE_256-NEXT: ubfiz w12, w12, #11, #1
; VBITS_GE_256-NEXT: orr w11, w11, w13
; VBITS_GE_256-NEXT: umov w13, v1.b[14]
; VBITS_GE_256-NEXT: bfi w8, w15, #3, #1
; VBITS_GE_256-NEXT: umov w15, v0.b[9]
; VBITS_GE_256-NEXT: orr w9, w9, w12
; VBITS_GE_256-NEXT: umov w12, v0.b[10]
; VBITS_GE_256-NEXT: ubfiz w14, w14, #23, #1
; VBITS_GE_256-NEXT: ubfiz w16, w16, #12, #1
; VBITS_GE_256-NEXT: ubfiz w18, w18, #24, #1
; VBITS_GE_256-NEXT: umov w10, v1.b[5]
; VBITS_GE_256-NEXT: ubfiz w17, w17, #13, #1
; VBITS_GE_256-NEXT: orr w11, w11, w14
; VBITS_GE_256-NEXT: bfi w8, w1, #4, #1
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: umov w16, v1.b[15]
; VBITS_GE_256-NEXT: ubfiz w15, w15, #25, #1
; VBITS_GE_256-NEXT: ubfiz w13, w13, #14, #1
; VBITS_GE_256-NEXT: orr w11, w11, w18
; VBITS_GE_256-NEXT: umov w18, v0.b[0]
; VBITS_GE_256-NEXT: umov w1, v0.b[11]
; VBITS_GE_256-NEXT: ubfiz w12, w12, #26, #1
; VBITS_GE_256-NEXT: orr w9, w9, w17
; VBITS_GE_256-NEXT: umov w17, v0.b[1]
; VBITS_GE_256-NEXT: orr w11, w11, w15
; VBITS_GE_256-NEXT: orr w9, w9, w13
; VBITS_GE_256-NEXT: umov w13, v0.b[12]
; VBITS_GE_256-NEXT: umov w14, v1.b[6]
; VBITS_GE_256-NEXT: umov w15, v0.b[2]
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: umov w12, v0.b[13]
; VBITS_GE_256-NEXT: ubfiz w16, w16, #15, #1
; VBITS_GE_256-NEXT: bfi w8, w10, #5, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[14]
; VBITS_GE_256-NEXT: ubfiz w1, w1, #27, #1
; VBITS_GE_256-NEXT: ubfiz w18, w18, #16, #1
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: ubfiz w16, w17, #17, #1
; VBITS_GE_256-NEXT: ubfiz w13, w13, #28, #1
; VBITS_GE_256-NEXT: orr w11, w11, w1
; VBITS_GE_256-NEXT: bfi w8, w14, #6, #1
; VBITS_GE_256-NEXT: orr w9, w9, w18
; VBITS_GE_256-NEXT: ubfiz w14, w15, #18, #1
; VBITS_GE_256-NEXT: ubfiz w12, w12, #29, #1
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: orr w11, w11, w13
; VBITS_GE_256-NEXT: ubfiz w10, w10, #30, #1
; VBITS_GE_256-NEXT: umov w13, v0.b[15]
; VBITS_GE_256-NEXT: orr w9, w9, w14
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: orr w8, w8, w9
; VBITS_GE_256-NEXT: orr w9, w11, w10
; VBITS_GE_256-NEXT: orr w8, w8, w9
; VBITS_GE_256-NEXT: orr w8, w8, w13, lsl #31
; VBITS_GE_256-NEXT: tbz w8, #0, .LBB25_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rb { z0.b }, p1/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #1
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB25_3
; VBITS_GE_256-NEXT: b .LBB25_4
; VBITS_GE_256-NEXT: .LBB25_2:
; VBITS_GE_256-NEXT: ptrue p2.b, vl32
; VBITS_GE_256-NEXT: adrp x9, .LCPI25_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI25_0
; VBITS_GE_256-NEXT: ld1b { z0.b }, p2/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB25_4
; VBITS_GE_256-NEXT: .LBB25_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: .LBB25_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB25_36
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB25_37
; VBITS_GE_256-NEXT: .LBB25_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB25_38
; VBITS_GE_256-NEXT: .LBB25_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB25_39
; VBITS_GE_256-NEXT: .LBB25_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB25_40
; VBITS_GE_256-NEXT: .LBB25_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB25_41
; VBITS_GE_256-NEXT: .LBB25_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB25_42
; VBITS_GE_256-NEXT: .LBB25_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB25_43
; VBITS_GE_256-NEXT: .LBB25_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB25_44
; VBITS_GE_256-NEXT: .LBB25_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB25_45
; VBITS_GE_256-NEXT: .LBB25_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB25_46
; VBITS_GE_256-NEXT: .LBB25_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB25_47
; VBITS_GE_256-NEXT: .LBB25_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB25_48
; VBITS_GE_256-NEXT: .LBB25_17: // %else54
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB25_49
; VBITS_GE_256-NEXT: .LBB25_18: // %else58
; VBITS_GE_256-NEXT: tbnz w8, #16, .LBB25_50
; VBITS_GE_256-NEXT: .LBB25_19: // %else62
; VBITS_GE_256-NEXT: tbnz w8, #17, .LBB25_51
; VBITS_GE_256-NEXT: .LBB25_20: // %else66
; VBITS_GE_256-NEXT: tbnz w8, #18, .LBB25_52
; VBITS_GE_256-NEXT: .LBB25_21: // %else70
; VBITS_GE_256-NEXT: tbnz w8, #19, .LBB25_53
; VBITS_GE_256-NEXT: .LBB25_22: // %else74
; VBITS_GE_256-NEXT: tbnz w8, #20, .LBB25_54
; VBITS_GE_256-NEXT: .LBB25_23: // %else78
; VBITS_GE_256-NEXT: tbnz w8, #21, .LBB25_55
; VBITS_GE_256-NEXT: .LBB25_24: // %else82
; VBITS_GE_256-NEXT: tbnz w8, #22, .LBB25_56
; VBITS_GE_256-NEXT: .LBB25_25: // %else86
; VBITS_GE_256-NEXT: tbnz w8, #23, .LBB25_57
; VBITS_GE_256-NEXT: .LBB25_26: // %else90
; VBITS_GE_256-NEXT: tbnz w8, #24, .LBB25_58
; VBITS_GE_256-NEXT: .LBB25_27: // %else94
; VBITS_GE_256-NEXT: tbnz w8, #25, .LBB25_59
; VBITS_GE_256-NEXT: .LBB25_28: // %else98
; VBITS_GE_256-NEXT: tbnz w8, #26, .LBB25_60
; VBITS_GE_256-NEXT: .LBB25_29: // %else102
; VBITS_GE_256-NEXT: tbnz w8, #27, .LBB25_61
; VBITS_GE_256-NEXT: .LBB25_30: // %else106
; VBITS_GE_256-NEXT: tbnz w8, #28, .LBB25_62
; VBITS_GE_256-NEXT: .LBB25_31: // %else110
; VBITS_GE_256-NEXT: tbnz w8, #29, .LBB25_63
; VBITS_GE_256-NEXT: .LBB25_32: // %else114
; VBITS_GE_256-NEXT: tbnz w8, #30, .LBB25_64
; VBITS_GE_256-NEXT: .LBB25_33: // %else118
; VBITS_GE_256-NEXT: tbz w8, #31, .LBB25_35
; VBITS_GE_256-NEXT: .LBB25_34: // %cond.load121
; VBITS_GE_256-NEXT: mov w8, #31 // =0x1f
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w8
; VBITS_GE_256-NEXT: ldrb w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w8
; VBITS_GE_256-NEXT: .LBB25_35: // %else122
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB25_36: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB25_6
; VBITS_GE_256-NEXT: .LBB25_37: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB25_7
; VBITS_GE_256-NEXT: .LBB25_38: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB25_8
; VBITS_GE_256-NEXT: .LBB25_39: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB25_9
; VBITS_GE_256-NEXT: .LBB25_40: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB25_10
; VBITS_GE_256-NEXT: .LBB25_41: // %cond.load25
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB25_11
; VBITS_GE_256-NEXT: .LBB25_42: // %cond.load29
; VBITS_GE_256-NEXT: mov w9, #8 // =0x8
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB25_12
; VBITS_GE_256-NEXT: .LBB25_43: // %cond.load33
; VBITS_GE_256-NEXT: mov w9, #9 // =0x9
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB25_13
; VBITS_GE_256-NEXT: .LBB25_44: // %cond.load37
; VBITS_GE_256-NEXT: mov w9, #10 // =0xa
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB25_14
; VBITS_GE_256-NEXT: .LBB25_45: // %cond.load41
; VBITS_GE_256-NEXT: mov w9, #11 // =0xb
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB25_15
; VBITS_GE_256-NEXT: .LBB25_46: // %cond.load45
; VBITS_GE_256-NEXT: mov w9, #12 // =0xc
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB25_16
; VBITS_GE_256-NEXT: .LBB25_47: // %cond.load49
; VBITS_GE_256-NEXT: mov w9, #13 // =0xd
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB25_17
; VBITS_GE_256-NEXT: .LBB25_48: // %cond.load53
; VBITS_GE_256-NEXT: mov w9, #14 // =0xe
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB25_18
; VBITS_GE_256-NEXT: .LBB25_49: // %cond.load57
; VBITS_GE_256-NEXT: mov w9, #15 // =0xf
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #16, .LBB25_19
; VBITS_GE_256-NEXT: .LBB25_50: // %cond.load61
; VBITS_GE_256-NEXT: mov w9, #16 // =0x10
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #17, .LBB25_20
; VBITS_GE_256-NEXT: .LBB25_51: // %cond.load65
; VBITS_GE_256-NEXT: mov w9, #17 // =0x11
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #18, .LBB25_21
; VBITS_GE_256-NEXT: .LBB25_52: // %cond.load69
; VBITS_GE_256-NEXT: mov w9, #18 // =0x12
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #19, .LBB25_22
; VBITS_GE_256-NEXT: .LBB25_53: // %cond.load73
; VBITS_GE_256-NEXT: mov w9, #19 // =0x13
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #20, .LBB25_23
; VBITS_GE_256-NEXT: .LBB25_54: // %cond.load77
; VBITS_GE_256-NEXT: mov w9, #20 // =0x14
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #21, .LBB25_24
; VBITS_GE_256-NEXT: .LBB25_55: // %cond.load81
; VBITS_GE_256-NEXT: mov w9, #21 // =0x15
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #22, .LBB25_25
; VBITS_GE_256-NEXT: .LBB25_56: // %cond.load85
; VBITS_GE_256-NEXT: mov w9, #22 // =0x16
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #23, .LBB25_26
; VBITS_GE_256-NEXT: .LBB25_57: // %cond.load89
; VBITS_GE_256-NEXT: mov w9, #23 // =0x17
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #24, .LBB25_27
; VBITS_GE_256-NEXT: .LBB25_58: // %cond.load93
; VBITS_GE_256-NEXT: mov w9, #24 // =0x18
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #25, .LBB25_28
; VBITS_GE_256-NEXT: .LBB25_59: // %cond.load97
; VBITS_GE_256-NEXT: mov w9, #25 // =0x19
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #26, .LBB25_29
; VBITS_GE_256-NEXT: .LBB25_60: // %cond.load101
; VBITS_GE_256-NEXT: mov w9, #26 // =0x1a
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #27, .LBB25_30
; VBITS_GE_256-NEXT: .LBB25_61: // %cond.load105
; VBITS_GE_256-NEXT: mov w9, #27 // =0x1b
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #28, .LBB25_31
; VBITS_GE_256-NEXT: .LBB25_62: // %cond.load109
; VBITS_GE_256-NEXT: mov w9, #28 // =0x1c
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #29, .LBB25_32
; VBITS_GE_256-NEXT: .LBB25_63: // %cond.load113
; VBITS_GE_256-NEXT: mov w9, #29 // =0x1d
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #30, .LBB25_33
; VBITS_GE_256-NEXT: .LBB25_64: // %cond.load117
; VBITS_GE_256-NEXT: mov w9, #30 // =0x1e
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #31, .LBB25_34
; VBITS_GE_256-NEXT: b .LBB25_35
;
; VBITS_GE_512-LABEL: masked_load_sext_v32i8i16_m16:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #112
; VBITS_GE_512-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 112
; VBITS_GE_512-NEXT: .cfi_offset w19, -8
; VBITS_GE_512-NEXT: .cfi_offset w20, -16
; VBITS_GE_512-NEXT: .cfi_offset w21, -24
; VBITS_GE_512-NEXT: .cfi_offset w22, -32
; VBITS_GE_512-NEXT: .cfi_offset w23, -40
; VBITS_GE_512-NEXT: .cfi_offset w24, -48
; VBITS_GE_512-NEXT: .cfi_offset w25, -56
; VBITS_GE_512-NEXT: .cfi_offset w26, -64
; VBITS_GE_512-NEXT: .cfi_offset w27, -72
; VBITS_GE_512-NEXT: .cfi_offset w28, -80
; VBITS_GE_512-NEXT: .cfi_offset w30, -88
; VBITS_GE_512-NEXT: .cfi_offset w29, -96
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
; VBITS_GE_512-NEXT: str x2, [sp] // 8-byte Spill
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.b
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w12, v0.b[1]
; VBITS_GE_512-NEXT: fmov w6, s0
; VBITS_GE_512-NEXT: umov w3, v0.b[7]
; VBITS_GE_512-NEXT: umov w5, v0.b[8]
; VBITS_GE_512-NEXT: mov z5.b, z0.b[18]
; VBITS_GE_512-NEXT: mov z6.b, z0.b[19]
; VBITS_GE_512-NEXT: umov w13, v0.b[2]
; VBITS_GE_512-NEXT: umov w4, v0.b[9]
; VBITS_GE_512-NEXT: mov z7.b, z0.b[20]
; VBITS_GE_512-NEXT: umov w1, v0.b[10]
; VBITS_GE_512-NEXT: and w6, w6, #0x1
; VBITS_GE_512-NEXT: mov z16.b, z0.b[21]
; VBITS_GE_512-NEXT: fmov w20, s5
; VBITS_GE_512-NEXT: fmov w21, s6
; VBITS_GE_512-NEXT: bfi w6, w12, #1, #1
; VBITS_GE_512-NEXT: umov w11, v0.b[3]
; VBITS_GE_512-NEXT: umov w16, v0.b[11]
; VBITS_GE_512-NEXT: mov z17.b, z0.b[22]
; VBITS_GE_512-NEXT: fmov w22, s7
; VBITS_GE_512-NEXT: ubfiz w12, w3, #7, #1
; VBITS_GE_512-NEXT: ubfiz w3, w5, #8, #1
; VBITS_GE_512-NEXT: umov w17, v0.b[12]
; VBITS_GE_512-NEXT: mov z18.b, z0.b[23]
; VBITS_GE_512-NEXT: bfi w6, w13, #2, #1
; VBITS_GE_512-NEXT: ubfiz w13, w4, #9, #1
; VBITS_GE_512-NEXT: umov w18, v0.b[13]
; VBITS_GE_512-NEXT: mov z19.b, z0.b[24]
; VBITS_GE_512-NEXT: fmov w23, s16
; VBITS_GE_512-NEXT: ubfiz w5, w20, #18, #1
; VBITS_GE_512-NEXT: ubfiz w20, w21, #19, #1
; VBITS_GE_512-NEXT: orr w12, w12, w3
; VBITS_GE_512-NEXT: ubfiz w1, w1, #10, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: mov z20.b, z0.b[25]
; VBITS_GE_512-NEXT: fmov w24, s17
; VBITS_GE_512-NEXT: ubfiz w4, w22, #20, #1
; VBITS_GE_512-NEXT: orr w12, w12, w13
; VBITS_GE_512-NEXT: mov z21.b, z0.b[26]
; VBITS_GE_512-NEXT: fmov w25, s18
; VBITS_GE_512-NEXT: orr w3, w5, w20
; VBITS_GE_512-NEXT: bfi w6, w11, #3, #1
; VBITS_GE_512-NEXT: orr w11, w12, w1
; VBITS_GE_512-NEXT: ubfiz w12, w16, #11, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: mov z22.b, z0.b[27]
; VBITS_GE_512-NEXT: fmov w26, s19
; VBITS_GE_512-NEXT: orr w13, w3, w4
; VBITS_GE_512-NEXT: ubfiz w3, w23, #21, #1
; VBITS_GE_512-NEXT: ubfiz w16, w17, #12, #1
; VBITS_GE_512-NEXT: fmov w27, s20
; VBITS_GE_512-NEXT: ubfiz w17, w24, #22, #1
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: ubfiz w12, w18, #13, #1
; VBITS_GE_512-NEXT: fmov w28, s21
; VBITS_GE_512-NEXT: orr w13, w13, w3
; VBITS_GE_512-NEXT: ubfiz w18, w25, #23, #1
; VBITS_GE_512-NEXT: bfi w6, w10, #4, #1
; VBITS_GE_512-NEXT: orr w10, w11, w16
; VBITS_GE_512-NEXT: umov w15, v0.b[15]
; VBITS_GE_512-NEXT: mov z3.b, z0.b[16]
; VBITS_GE_512-NEXT: mov z23.b, z0.b[28]
; VBITS_GE_512-NEXT: fmov w29, s22
; VBITS_GE_512-NEXT: orr w11, w13, w17
; VBITS_GE_512-NEXT: orr w10, w10, w12
; VBITS_GE_512-NEXT: ubfiz w12, w26, #24, #1
; VBITS_GE_512-NEXT: mov z4.b, z0.b[17]
; VBITS_GE_512-NEXT: mov z24.b, z0.b[29]
; VBITS_GE_512-NEXT: orr w11, w11, w18
; VBITS_GE_512-NEXT: bfi w6, w9, #5, #1
; VBITS_GE_512-NEXT: ubfiz w9, w14, #14, #1
; VBITS_GE_512-NEXT: ubfiz w13, w27, #25, #1
; VBITS_GE_512-NEXT: mov z2.b, z0.b[30]
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: ubfiz w14, w28, #26, #1
; VBITS_GE_512-NEXT: fmov w7, s3
; VBITS_GE_512-NEXT: fmov w30, s23
; VBITS_GE_512-NEXT: orr w9, w10, w9
; VBITS_GE_512-NEXT: orr w10, w11, w13
; VBITS_GE_512-NEXT: ubfiz w11, w29, #27, #1
; VBITS_GE_512-NEXT: umov w2, v0.b[6]
; VBITS_GE_512-NEXT: fmov w19, s4
; VBITS_GE_512-NEXT: fmov w8, s24
; VBITS_GE_512-NEXT: ubfiz w12, w15, #15, #1
; VBITS_GE_512-NEXT: orr w10, w10, w14
; VBITS_GE_512-NEXT: ubfiz w14, w30, #28, #1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[31]
; VBITS_GE_512-NEXT: orr w10, w10, w11
; VBITS_GE_512-NEXT: fmov w11, s2
; VBITS_GE_512-NEXT: orr w9, w9, w12
; VBITS_GE_512-NEXT: ubfiz w12, w7, #16, #1
; VBITS_GE_512-NEXT: ubfiz w13, w19, #17, #1
; VBITS_GE_512-NEXT: ubfiz w8, w8, #29, #1
; VBITS_GE_512-NEXT: bfi w6, w2, #6, #1
; VBITS_GE_512-NEXT: orr w10, w10, w14
; VBITS_GE_512-NEXT: orr w9, w9, w12
; VBITS_GE_512-NEXT: ubfiz w11, w11, #30, #1
; VBITS_GE_512-NEXT: orr w8, w10, w8
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: orr w9, w6, w9
; VBITS_GE_512-NEXT: orr w8, w8, w11
; VBITS_GE_512-NEXT: orr w8, w9, w8
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: orr w8, w8, w9, lsl #31
; VBITS_GE_512-NEXT: tbz w8, #0, .LBB25_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rb { z0.b }, p1/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #1
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB25_3
; VBITS_GE_512-NEXT: b .LBB25_4
; VBITS_GE_512-NEXT: .LBB25_2:
; VBITS_GE_512-NEXT: ptrue p2.b, vl32
; VBITS_GE_512-NEXT: adrp x9, .LCPI25_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI25_0
; VBITS_GE_512-NEXT: ld1b { z0.b }, p2/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB25_4
; VBITS_GE_512-NEXT: .LBB25_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: .LBB25_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB25_36
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB25_37
; VBITS_GE_512-NEXT: .LBB25_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB25_38
; VBITS_GE_512-NEXT: .LBB25_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB25_39
; VBITS_GE_512-NEXT: .LBB25_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB25_40
; VBITS_GE_512-NEXT: .LBB25_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB25_41
; VBITS_GE_512-NEXT: .LBB25_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB25_42
; VBITS_GE_512-NEXT: .LBB25_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB25_43
; VBITS_GE_512-NEXT: .LBB25_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB25_44
; VBITS_GE_512-NEXT: .LBB25_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB25_45
; VBITS_GE_512-NEXT: .LBB25_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB25_46
; VBITS_GE_512-NEXT: .LBB25_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB25_47
; VBITS_GE_512-NEXT: .LBB25_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB25_48
; VBITS_GE_512-NEXT: .LBB25_17: // %else54
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB25_49
; VBITS_GE_512-NEXT: .LBB25_18: // %else58
; VBITS_GE_512-NEXT: tbnz w8, #16, .LBB25_50
; VBITS_GE_512-NEXT: .LBB25_19: // %else62
; VBITS_GE_512-NEXT: tbnz w8, #17, .LBB25_51
; VBITS_GE_512-NEXT: .LBB25_20: // %else66
; VBITS_GE_512-NEXT: tbnz w8, #18, .LBB25_52
; VBITS_GE_512-NEXT: .LBB25_21: // %else70
; VBITS_GE_512-NEXT: tbnz w8, #19, .LBB25_53
; VBITS_GE_512-NEXT: .LBB25_22: // %else74
; VBITS_GE_512-NEXT: tbnz w8, #20, .LBB25_54
; VBITS_GE_512-NEXT: .LBB25_23: // %else78
; VBITS_GE_512-NEXT: tbnz w8, #21, .LBB25_55
; VBITS_GE_512-NEXT: .LBB25_24: // %else82
; VBITS_GE_512-NEXT: tbnz w8, #22, .LBB25_56
; VBITS_GE_512-NEXT: .LBB25_25: // %else86
; VBITS_GE_512-NEXT: tbnz w8, #23, .LBB25_57
; VBITS_GE_512-NEXT: .LBB25_26: // %else90
; VBITS_GE_512-NEXT: tbnz w8, #24, .LBB25_58
; VBITS_GE_512-NEXT: .LBB25_27: // %else94
; VBITS_GE_512-NEXT: tbnz w8, #25, .LBB25_59
; VBITS_GE_512-NEXT: .LBB25_28: // %else98
; VBITS_GE_512-NEXT: tbnz w8, #26, .LBB25_60
; VBITS_GE_512-NEXT: .LBB25_29: // %else102
; VBITS_GE_512-NEXT: tbnz w8, #27, .LBB25_61
; VBITS_GE_512-NEXT: .LBB25_30: // %else106
; VBITS_GE_512-NEXT: tbnz w8, #28, .LBB25_62
; VBITS_GE_512-NEXT: .LBB25_31: // %else110
; VBITS_GE_512-NEXT: tbnz w8, #29, .LBB25_63
; VBITS_GE_512-NEXT: .LBB25_32: // %else114
; VBITS_GE_512-NEXT: tbnz w8, #30, .LBB25_64
; VBITS_GE_512-NEXT: .LBB25_33: // %else118
; VBITS_GE_512-NEXT: tbz w8, #31, .LBB25_35
; VBITS_GE_512-NEXT: .LBB25_34: // %cond.load121
; VBITS_GE_512-NEXT: mov w8, #31 // =0x1f
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w8
; VBITS_GE_512-NEXT: ldrb w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w8
; VBITS_GE_512-NEXT: .LBB25_35: // %else122
; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: ldr x8, [sp] // 8-byte Reload
; VBITS_GE_512-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8]
; VBITS_GE_512-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: add sp, sp, #112
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB25_36: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB25_6
; VBITS_GE_512-NEXT: .LBB25_37: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB25_7
; VBITS_GE_512-NEXT: .LBB25_38: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB25_8
; VBITS_GE_512-NEXT: .LBB25_39: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB25_9
; VBITS_GE_512-NEXT: .LBB25_40: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB25_10
; VBITS_GE_512-NEXT: .LBB25_41: // %cond.load25
; VBITS_GE_512-NEXT: mov w9, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB25_11
; VBITS_GE_512-NEXT: .LBB25_42: // %cond.load29
; VBITS_GE_512-NEXT: mov w9, #8 // =0x8
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB25_12
; VBITS_GE_512-NEXT: .LBB25_43: // %cond.load33
; VBITS_GE_512-NEXT: mov w9, #9 // =0x9
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB25_13
; VBITS_GE_512-NEXT: .LBB25_44: // %cond.load37
; VBITS_GE_512-NEXT: mov w9, #10 // =0xa
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB25_14
; VBITS_GE_512-NEXT: .LBB25_45: // %cond.load41
; VBITS_GE_512-NEXT: mov w9, #11 // =0xb
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB25_15
; VBITS_GE_512-NEXT: .LBB25_46: // %cond.load45
; VBITS_GE_512-NEXT: mov w9, #12 // =0xc
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB25_16
; VBITS_GE_512-NEXT: .LBB25_47: // %cond.load49
; VBITS_GE_512-NEXT: mov w9, #13 // =0xd
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB25_17
; VBITS_GE_512-NEXT: .LBB25_48: // %cond.load53
; VBITS_GE_512-NEXT: mov w9, #14 // =0xe
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB25_18
; VBITS_GE_512-NEXT: .LBB25_49: // %cond.load57
; VBITS_GE_512-NEXT: mov w9, #15 // =0xf
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #16, .LBB25_19
; VBITS_GE_512-NEXT: .LBB25_50: // %cond.load61
; VBITS_GE_512-NEXT: mov w9, #16 // =0x10
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #17, .LBB25_20
; VBITS_GE_512-NEXT: .LBB25_51: // %cond.load65
; VBITS_GE_512-NEXT: mov w9, #17 // =0x11
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #18, .LBB25_21
; VBITS_GE_512-NEXT: .LBB25_52: // %cond.load69
; VBITS_GE_512-NEXT: mov w9, #18 // =0x12
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #19, .LBB25_22
; VBITS_GE_512-NEXT: .LBB25_53: // %cond.load73
; VBITS_GE_512-NEXT: mov w9, #19 // =0x13
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #20, .LBB25_23
; VBITS_GE_512-NEXT: .LBB25_54: // %cond.load77
; VBITS_GE_512-NEXT: mov w9, #20 // =0x14
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #21, .LBB25_24
; VBITS_GE_512-NEXT: .LBB25_55: // %cond.load81
; VBITS_GE_512-NEXT: mov w9, #21 // =0x15
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #22, .LBB25_25
; VBITS_GE_512-NEXT: .LBB25_56: // %cond.load85
; VBITS_GE_512-NEXT: mov w9, #22 // =0x16
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #23, .LBB25_26
; VBITS_GE_512-NEXT: .LBB25_57: // %cond.load89
; VBITS_GE_512-NEXT: mov w9, #23 // =0x17
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #24, .LBB25_27
; VBITS_GE_512-NEXT: .LBB25_58: // %cond.load93
; VBITS_GE_512-NEXT: mov w9, #24 // =0x18
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #25, .LBB25_28
; VBITS_GE_512-NEXT: .LBB25_59: // %cond.load97
; VBITS_GE_512-NEXT: mov w9, #25 // =0x19
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #26, .LBB25_29
; VBITS_GE_512-NEXT: .LBB25_60: // %cond.load101
; VBITS_GE_512-NEXT: mov w9, #26 // =0x1a
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #27, .LBB25_30
; VBITS_GE_512-NEXT: .LBB25_61: // %cond.load105
; VBITS_GE_512-NEXT: mov w9, #27 // =0x1b
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #28, .LBB25_31
; VBITS_GE_512-NEXT: .LBB25_62: // %cond.load109
; VBITS_GE_512-NEXT: mov w9, #28 // =0x1c
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #29, .LBB25_32
; VBITS_GE_512-NEXT: .LBB25_63: // %cond.load113
; VBITS_GE_512-NEXT: mov w9, #29 // =0x1d
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #30, .LBB25_33
; VBITS_GE_512-NEXT: .LBB25_64: // %cond.load117
; VBITS_GE_512-NEXT: mov w9, #30 // =0x1e
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #31, .LBB25_34
; VBITS_GE_512-NEXT: b .LBB25_35
;
; CHECK-EXPAND-LABEL: masked_load_sext_v32i8i16_m16:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.h, vl16
; CHECK-EXPAND-NEXT: mov x8, #16 // =0x10
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x1, x8, lsl #1]
; CHECK-EXPAND-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-EXPAND-NEXT: cmpeq p2.h, p0/z, z1.h, #0
; CHECK-EXPAND-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: ptrue p1.b, vl16
; CHECK-EXPAND-NEXT: uzp1 z3.b, z0.b, z0.b
; CHECK-EXPAND-NEXT: uzp1 z2.b, z1.b, z1.b
; CHECK-EXPAND-NEXT: splice z0.b, p1, { z2.b, z3.b }
; CHECK-EXPAND-NEXT: ptrue p1.b, vl32
; CHECK-EXPAND-NEXT: cmpne p2.b, p1/z, z0.b, #0
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b
; CHECK-EXPAND-NEXT: whilelo p1.b, xzr, x9
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: sunpklo z0.h, z0.b
; CHECK-EXPAND-NEXT: sunpklo z1.h, z1.b
; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-EXPAND-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1]
; CHECK-EXPAND-NEXT: ret
%b = load <32 x i16>, ptr %bp
%mask = icmp eq <32 x i16> %b, zeroinitializer
%load = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %ap, <32 x i1> %mask, <32 x i8> poison)
%ext = sext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, ptr %c
ret void
}
define void @masked_load_sext_v16i8i32_m32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v16i8i32_m32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: adrp x8, .LCPI26_0
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, #0
; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
; VBITS_GE_256-NEXT: ldr q0, [x8, :lo12:.LCPI26_0]
; VBITS_GE_256-NEXT: and v0.16b, v1.16b, v0.16b
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b
; VBITS_GE_256-NEXT: addv h0, v0.8h
; VBITS_GE_256-NEXT: fmov w9, s0
; VBITS_GE_256-NEXT: fmov w8, s0
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB26_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: fmov s0, w9
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB26_3
; VBITS_GE_256-NEXT: b .LBB26_4
; VBITS_GE_256-NEXT: .LBB26_2:
; VBITS_GE_256-NEXT: // implicit-def: $q0
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB26_4
; VBITS_GE_256-NEXT: .LBB26_3: // %cond.load1
; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_256-NEXT: .LBB26_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB26_20
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB26_21
; VBITS_GE_256-NEXT: .LBB26_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB26_22
; VBITS_GE_256-NEXT: .LBB26_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB26_23
; VBITS_GE_256-NEXT: .LBB26_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB26_24
; VBITS_GE_256-NEXT: .LBB26_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB26_25
; VBITS_GE_256-NEXT: .LBB26_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB26_26
; VBITS_GE_256-NEXT: .LBB26_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB26_27
; VBITS_GE_256-NEXT: .LBB26_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB26_28
; VBITS_GE_256-NEXT: .LBB26_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB26_29
; VBITS_GE_256-NEXT: .LBB26_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB26_30
; VBITS_GE_256-NEXT: .LBB26_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB26_31
; VBITS_GE_256-NEXT: .LBB26_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB26_32
; VBITS_GE_256-NEXT: .LBB26_17: // %else54
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB26_19
; VBITS_GE_256-NEXT: .LBB26_18: // %cond.load57
; VBITS_GE_256-NEXT: ld1 { v0.b }[15], [x0]
; VBITS_GE_256-NEXT: .LBB26_19: // %else58
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB26_20: // %cond.load5
; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB26_6
; VBITS_GE_256-NEXT: .LBB26_21: // %cond.load9
; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB26_7
; VBITS_GE_256-NEXT: .LBB26_22: // %cond.load13
; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB26_8
; VBITS_GE_256-NEXT: .LBB26_23: // %cond.load17
; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB26_9
; VBITS_GE_256-NEXT: .LBB26_24: // %cond.load21
; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB26_10
; VBITS_GE_256-NEXT: .LBB26_25: // %cond.load25
; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB26_11
; VBITS_GE_256-NEXT: .LBB26_26: // %cond.load29
; VBITS_GE_256-NEXT: ld1 { v0.b }[8], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB26_12
; VBITS_GE_256-NEXT: .LBB26_27: // %cond.load33
; VBITS_GE_256-NEXT: ld1 { v0.b }[9], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB26_13
; VBITS_GE_256-NEXT: .LBB26_28: // %cond.load37
; VBITS_GE_256-NEXT: ld1 { v0.b }[10], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB26_14
; VBITS_GE_256-NEXT: .LBB26_29: // %cond.load41
; VBITS_GE_256-NEXT: ld1 { v0.b }[11], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB26_15
; VBITS_GE_256-NEXT: .LBB26_30: // %cond.load45
; VBITS_GE_256-NEXT: ld1 { v0.b }[12], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB26_16
; VBITS_GE_256-NEXT: .LBB26_31: // %cond.load49
; VBITS_GE_256-NEXT: ld1 { v0.b }[13], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB26_17
; VBITS_GE_256-NEXT: .LBB26_32: // %cond.load53
; VBITS_GE_256-NEXT: ld1 { v0.b }[14], [x0], #1
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB26_18
; VBITS_GE_256-NEXT: b .LBB26_19
;
; VBITS_GE_512-LABEL: masked_load_sext_v16i8i32_m32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: umov w11, v0.b[7]
; VBITS_GE_512-NEXT: umov w12, v0.b[8]
; VBITS_GE_512-NEXT: umov w13, v0.b[3]
; VBITS_GE_512-NEXT: umov w14, v0.b[4]
; VBITS_GE_512-NEXT: umov w15, v0.b[10]
; VBITS_GE_512-NEXT: umov w16, v0.b[5]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[9]
; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1
; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1
; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[11]
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: umov w12, v0.b[13]
; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[12]
; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1
; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: orr w9, w11, w9
; VBITS_GE_512-NEXT: umov w11, v0.b[6]
; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1
; VBITS_GE_512-NEXT: orr w9, w9, w15
; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1
; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[15]
; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: orr w9, w9, w12
; VBITS_GE_512-NEXT: orr w8, w8, w9
; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15
; VBITS_GE_512-NEXT: and w8, w9, #0xffff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB26_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: fmov s0, w9
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB26_3
; VBITS_GE_512-NEXT: b .LBB26_4
; VBITS_GE_512-NEXT: .LBB26_2:
; VBITS_GE_512-NEXT: // implicit-def: $q0
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB26_4
; VBITS_GE_512-NEXT: .LBB26_3: // %cond.load1
; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_512-NEXT: .LBB26_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB26_20
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB26_21
; VBITS_GE_512-NEXT: .LBB26_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB26_22
; VBITS_GE_512-NEXT: .LBB26_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB26_23
; VBITS_GE_512-NEXT: .LBB26_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB26_24
; VBITS_GE_512-NEXT: .LBB26_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB26_25
; VBITS_GE_512-NEXT: .LBB26_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB26_26
; VBITS_GE_512-NEXT: .LBB26_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB26_27
; VBITS_GE_512-NEXT: .LBB26_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB26_28
; VBITS_GE_512-NEXT: .LBB26_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB26_29
; VBITS_GE_512-NEXT: .LBB26_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB26_30
; VBITS_GE_512-NEXT: .LBB26_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB26_31
; VBITS_GE_512-NEXT: .LBB26_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB26_32
; VBITS_GE_512-NEXT: .LBB26_17: // %else54
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB26_19
; VBITS_GE_512-NEXT: .LBB26_18: // %cond.load57
; VBITS_GE_512-NEXT: ld1 { v0.b }[15], [x0]
; VBITS_GE_512-NEXT: .LBB26_19: // %else58
; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB26_20: // %cond.load5
; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB26_6
; VBITS_GE_512-NEXT: .LBB26_21: // %cond.load9
; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB26_7
; VBITS_GE_512-NEXT: .LBB26_22: // %cond.load13
; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB26_8
; VBITS_GE_512-NEXT: .LBB26_23: // %cond.load17
; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB26_9
; VBITS_GE_512-NEXT: .LBB26_24: // %cond.load21
; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB26_10
; VBITS_GE_512-NEXT: .LBB26_25: // %cond.load25
; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB26_11
; VBITS_GE_512-NEXT: .LBB26_26: // %cond.load29
; VBITS_GE_512-NEXT: ld1 { v0.b }[8], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB26_12
; VBITS_GE_512-NEXT: .LBB26_27: // %cond.load33
; VBITS_GE_512-NEXT: ld1 { v0.b }[9], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB26_13
; VBITS_GE_512-NEXT: .LBB26_28: // %cond.load37
; VBITS_GE_512-NEXT: ld1 { v0.b }[10], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB26_14
; VBITS_GE_512-NEXT: .LBB26_29: // %cond.load41
; VBITS_GE_512-NEXT: ld1 { v0.b }[11], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB26_15
; VBITS_GE_512-NEXT: .LBB26_30: // %cond.load45
; VBITS_GE_512-NEXT: ld1 { v0.b }[12], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB26_16
; VBITS_GE_512-NEXT: .LBB26_31: // %cond.load49
; VBITS_GE_512-NEXT: ld1 { v0.b }[13], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB26_17
; VBITS_GE_512-NEXT: .LBB26_32: // %cond.load53
; VBITS_GE_512-NEXT: ld1 { v0.b }[14], [x0], #1
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB26_18
; VBITS_GE_512-NEXT: b .LBB26_19
;
; CHECK-EXPAND-LABEL: masked_load_sext_v16i8i32_m32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2]
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cmpeq p2.s, p0/z, z1.s, #0
; CHECK-EXPAND-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: ptrue p1.b, vl16
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: uzp1 z1.h, z1.h, z1.h
; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-EXPAND-NEXT: uzp1 z1.b, z1.b, z1.b
; CHECK-EXPAND-NEXT: mov v1.d[1], v0.d[0]
; CHECK-EXPAND-NEXT: cmpne p2.b, p1/z, z1.b, #0
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b
; CHECK-EXPAND-NEXT: whilelo p1.b, xzr, x9
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b
; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-EXPAND-NEXT: sunpklo z0.h, z0.b
; CHECK-EXPAND-NEXT: sunpklo z1.h, z1.b
; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; CHECK-EXPAND-NEXT: ret
%b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %b, zeroinitializer
%load = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %ap, <16 x i1> %mask, <16 x i8> poison)
%ext = sext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, ptr %c
ret void
}
define void @masked_load_sext_v8i8i64_m64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v8i8i64_m64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0
; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s, vl4
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB27_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: fmov s0, w9
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB27_3
; VBITS_GE_256-NEXT: b .LBB27_4
; VBITS_GE_256-NEXT: .LBB27_2:
; VBITS_GE_256-NEXT: // implicit-def: $d0
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB27_4
; VBITS_GE_256-NEXT: .LBB27_3: // %cond.load1
; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_256-NEXT: .LBB27_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB27_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB27_13
; VBITS_GE_256-NEXT: .LBB27_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB27_14
; VBITS_GE_256-NEXT: .LBB27_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB27_15
; VBITS_GE_256-NEXT: .LBB27_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB27_16
; VBITS_GE_256-NEXT: .LBB27_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB27_11
; VBITS_GE_256-NEXT: .LBB27_10: // %cond.load25
; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0]
; VBITS_GE_256-NEXT: .LBB27_11: // %else26
; VBITS_GE_256-NEXT: sshll v0.8h, v0.8b, #0
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB27_12: // %cond.load5
; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB27_6
; VBITS_GE_256-NEXT: .LBB27_13: // %cond.load9
; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB27_7
; VBITS_GE_256-NEXT: .LBB27_14: // %cond.load13
; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB27_8
; VBITS_GE_256-NEXT: .LBB27_15: // %cond.load17
; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB27_9
; VBITS_GE_256-NEXT: .LBB27_16: // %cond.load21
; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB27_10
; VBITS_GE_256-NEXT: b .LBB27_11
;
; VBITS_GE_512-LABEL: masked_load_sext_v8i8i64_m64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB27_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: fmov s0, w9
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB27_3
; VBITS_GE_512-NEXT: b .LBB27_4
; VBITS_GE_512-NEXT: .LBB27_2:
; VBITS_GE_512-NEXT: // implicit-def: $d0
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB27_4
; VBITS_GE_512-NEXT: .LBB27_3: // %cond.load1
; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_512-NEXT: .LBB27_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB27_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB27_13
; VBITS_GE_512-NEXT: .LBB27_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB27_14
; VBITS_GE_512-NEXT: .LBB27_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB27_15
; VBITS_GE_512-NEXT: .LBB27_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB27_16
; VBITS_GE_512-NEXT: .LBB27_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB27_11
; VBITS_GE_512-NEXT: .LBB27_10: // %cond.load25
; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0]
; VBITS_GE_512-NEXT: .LBB27_11: // %else26
; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB27_12: // %cond.load5
; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB27_6
; VBITS_GE_512-NEXT: .LBB27_13: // %cond.load9
; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB27_7
; VBITS_GE_512-NEXT: .LBB27_14: // %cond.load13
; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB27_8
; VBITS_GE_512-NEXT: .LBB27_15: // %cond.load17
; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB27_9
; VBITS_GE_512-NEXT: .LBB27_16: // %cond.load21
; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB27_10
; VBITS_GE_512-NEXT: b .LBB27_11
;
; CHECK-EXPAND-LABEL: masked_load_sext_v8i8i64_m64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z1.d, #0
; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: ptrue p1.s, vl4
; CHECK-EXPAND-NEXT: uzp1 z3.s, z0.s, z0.s
; CHECK-EXPAND-NEXT: uzp1 z2.s, z1.s, z1.s
; CHECK-EXPAND-NEXT: splice z0.s, p1, { z2.s, z3.s }
; CHECK-EXPAND-NEXT: ptrue p1.b, vl8
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-EXPAND-NEXT: cmpne p2.b, p1/z, z0.b, #0
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b
; CHECK-EXPAND-NEXT: whilelo p1.b, xzr, x9
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b
; CHECK-EXPAND-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %b, zeroinitializer
%load = call <8 x i8> @llvm.masked.expandload.v8i8(ptr %ap, <8 x i1> %mask, <8 x i8> poison)
%ext = sext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_sext_v16i16i32_m32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v16i16i32_m32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: adrp x8, .LCPI28_0
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, #0
; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.h
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
; VBITS_GE_256-NEXT: ldr q0, [x8, :lo12:.LCPI28_0]
; VBITS_GE_256-NEXT: and v0.16b, v1.16b, v0.16b
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b
; VBITS_GE_256-NEXT: addv h0, v0.8h
; VBITS_GE_256-NEXT: fmov w9, s0
; VBITS_GE_256-NEXT: fmov w8, s0
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB28_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rh { z0.h }, p1/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #2
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB28_3
; VBITS_GE_256-NEXT: b .LBB28_4
; VBITS_GE_256-NEXT: .LBB28_2:
; VBITS_GE_256-NEXT: ptrue p2.h, vl16
; VBITS_GE_256-NEXT: adrp x9, .LCPI28_1
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI28_1
; VBITS_GE_256-NEXT: ld1h { z0.h }, p2/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB28_4
; VBITS_GE_256-NEXT: .LBB28_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: .LBB28_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB28_20
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB28_21
; VBITS_GE_256-NEXT: .LBB28_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB28_22
; VBITS_GE_256-NEXT: .LBB28_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB28_23
; VBITS_GE_256-NEXT: .LBB28_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB28_24
; VBITS_GE_256-NEXT: .LBB28_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB28_25
; VBITS_GE_256-NEXT: .LBB28_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB28_26
; VBITS_GE_256-NEXT: .LBB28_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB28_27
; VBITS_GE_256-NEXT: .LBB28_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB28_28
; VBITS_GE_256-NEXT: .LBB28_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB28_29
; VBITS_GE_256-NEXT: .LBB28_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB28_30
; VBITS_GE_256-NEXT: .LBB28_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB28_31
; VBITS_GE_256-NEXT: .LBB28_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB28_32
; VBITS_GE_256-NEXT: .LBB28_17: // %else54
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB28_19
; VBITS_GE_256-NEXT: .LBB28_18: // %cond.load57
; VBITS_GE_256-NEXT: mov w8, #15 // =0xf
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w8
; VBITS_GE_256-NEXT: ldrh w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w8
; VBITS_GE_256-NEXT: .LBB28_19: // %else58
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB28_20: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB28_6
; VBITS_GE_256-NEXT: .LBB28_21: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB28_7
; VBITS_GE_256-NEXT: .LBB28_22: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB28_8
; VBITS_GE_256-NEXT: .LBB28_23: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB28_9
; VBITS_GE_256-NEXT: .LBB28_24: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB28_10
; VBITS_GE_256-NEXT: .LBB28_25: // %cond.load25
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB28_11
; VBITS_GE_256-NEXT: .LBB28_26: // %cond.load29
; VBITS_GE_256-NEXT: mov w9, #8 // =0x8
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB28_12
; VBITS_GE_256-NEXT: .LBB28_27: // %cond.load33
; VBITS_GE_256-NEXT: mov w9, #9 // =0x9
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB28_13
; VBITS_GE_256-NEXT: .LBB28_28: // %cond.load37
; VBITS_GE_256-NEXT: mov w9, #10 // =0xa
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB28_14
; VBITS_GE_256-NEXT: .LBB28_29: // %cond.load41
; VBITS_GE_256-NEXT: mov w9, #11 // =0xb
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB28_15
; VBITS_GE_256-NEXT: .LBB28_30: // %cond.load45
; VBITS_GE_256-NEXT: mov w9, #12 // =0xc
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB28_16
; VBITS_GE_256-NEXT: .LBB28_31: // %cond.load49
; VBITS_GE_256-NEXT: mov w9, #13 // =0xd
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB28_17
; VBITS_GE_256-NEXT: .LBB28_32: // %cond.load53
; VBITS_GE_256-NEXT: mov w9, #14 // =0xe
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB28_18
; VBITS_GE_256-NEXT: b .LBB28_19
;
; VBITS_GE_512-LABEL: masked_load_sext_v16i16i32_m32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.h
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: umov w11, v0.b[7]
; VBITS_GE_512-NEXT: umov w12, v0.b[8]
; VBITS_GE_512-NEXT: umov w13, v0.b[3]
; VBITS_GE_512-NEXT: umov w14, v0.b[4]
; VBITS_GE_512-NEXT: umov w15, v0.b[10]
; VBITS_GE_512-NEXT: umov w16, v0.b[5]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[9]
; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1
; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1
; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[11]
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: umov w12, v0.b[13]
; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[12]
; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1
; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: orr w9, w11, w9
; VBITS_GE_512-NEXT: umov w11, v0.b[6]
; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1
; VBITS_GE_512-NEXT: orr w9, w9, w15
; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1
; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[15]
; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: orr w9, w9, w12
; VBITS_GE_512-NEXT: orr w8, w8, w9
; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15
; VBITS_GE_512-NEXT: and w8, w9, #0xffff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB28_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rh { z0.h }, p1/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #2
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB28_3
; VBITS_GE_512-NEXT: b .LBB28_4
; VBITS_GE_512-NEXT: .LBB28_2:
; VBITS_GE_512-NEXT: ptrue p2.h, vl16
; VBITS_GE_512-NEXT: adrp x9, .LCPI28_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI28_0
; VBITS_GE_512-NEXT: ld1h { z0.h }, p2/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB28_4
; VBITS_GE_512-NEXT: .LBB28_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: .LBB28_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB28_20
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB28_21
; VBITS_GE_512-NEXT: .LBB28_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB28_22
; VBITS_GE_512-NEXT: .LBB28_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB28_23
; VBITS_GE_512-NEXT: .LBB28_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB28_24
; VBITS_GE_512-NEXT: .LBB28_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB28_25
; VBITS_GE_512-NEXT: .LBB28_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB28_26
; VBITS_GE_512-NEXT: .LBB28_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB28_27
; VBITS_GE_512-NEXT: .LBB28_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB28_28
; VBITS_GE_512-NEXT: .LBB28_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB28_29
; VBITS_GE_512-NEXT: .LBB28_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB28_30
; VBITS_GE_512-NEXT: .LBB28_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB28_31
; VBITS_GE_512-NEXT: .LBB28_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB28_32
; VBITS_GE_512-NEXT: .LBB28_17: // %else54
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB28_19
; VBITS_GE_512-NEXT: .LBB28_18: // %cond.load57
; VBITS_GE_512-NEXT: mov w8, #15 // =0xf
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w8
; VBITS_GE_512-NEXT: ldrh w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w8
; VBITS_GE_512-NEXT: .LBB28_19: // %else58
; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB28_20: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB28_6
; VBITS_GE_512-NEXT: .LBB28_21: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB28_7
; VBITS_GE_512-NEXT: .LBB28_22: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB28_8
; VBITS_GE_512-NEXT: .LBB28_23: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB28_9
; VBITS_GE_512-NEXT: .LBB28_24: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB28_10
; VBITS_GE_512-NEXT: .LBB28_25: // %cond.load25
; VBITS_GE_512-NEXT: mov w9, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB28_11
; VBITS_GE_512-NEXT: .LBB28_26: // %cond.load29
; VBITS_GE_512-NEXT: mov w9, #8 // =0x8
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB28_12
; VBITS_GE_512-NEXT: .LBB28_27: // %cond.load33
; VBITS_GE_512-NEXT: mov w9, #9 // =0x9
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB28_13
; VBITS_GE_512-NEXT: .LBB28_28: // %cond.load37
; VBITS_GE_512-NEXT: mov w9, #10 // =0xa
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB28_14
; VBITS_GE_512-NEXT: .LBB28_29: // %cond.load41
; VBITS_GE_512-NEXT: mov w9, #11 // =0xb
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB28_15
; VBITS_GE_512-NEXT: .LBB28_30: // %cond.load45
; VBITS_GE_512-NEXT: mov w9, #12 // =0xc
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB28_16
; VBITS_GE_512-NEXT: .LBB28_31: // %cond.load49
; VBITS_GE_512-NEXT: mov w9, #13 // =0xd
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB28_17
; VBITS_GE_512-NEXT: .LBB28_32: // %cond.load53
; VBITS_GE_512-NEXT: mov w9, #14 // =0xe
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB28_18
; VBITS_GE_512-NEXT: b .LBB28_19
;
; CHECK-EXPAND-LABEL: masked_load_sext_v16i16i32_m32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2]
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cmpeq p2.s, p0/z, z1.s, #0
; CHECK-EXPAND-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: ptrue p1.h, vl16
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: uzp1 z1.h, z1.h, z1.h
; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-EXPAND-NEXT: uzp1 z1.b, z1.b, z1.b
; CHECK-EXPAND-NEXT: mov v1.d[1], v0.d[0]
; CHECK-EXPAND-NEXT: sunpklo z0.h, z1.b
; CHECK-EXPAND-NEXT: cmpne p2.h, p1/z, z0.h, #0
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.h
; CHECK-EXPAND-NEXT: whilelo p1.h, xzr, x9
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.h, p2, z0.h
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; CHECK-EXPAND-NEXT: ret
%b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %b, zeroinitializer
%load = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %ap, <16 x i1> %mask, <16 x i16> poison)
%ext = sext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, ptr %c
ret void
}
define void @masked_load_sext_v8i16i64_m64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v8i16i64_m64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0
; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s, vl4
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB29_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: fmov s0, w9
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB29_3
; VBITS_GE_256-NEXT: b .LBB29_4
; VBITS_GE_256-NEXT: .LBB29_2:
; VBITS_GE_256-NEXT: // implicit-def: $q0
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB29_4
; VBITS_GE_256-NEXT: .LBB29_3: // %cond.load1
; VBITS_GE_256-NEXT: ld1 { v0.h }[1], [x0], #2
; VBITS_GE_256-NEXT: .LBB29_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB29_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB29_13
; VBITS_GE_256-NEXT: .LBB29_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB29_14
; VBITS_GE_256-NEXT: .LBB29_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB29_15
; VBITS_GE_256-NEXT: .LBB29_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB29_16
; VBITS_GE_256-NEXT: .LBB29_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB29_11
; VBITS_GE_256-NEXT: .LBB29_10: // %cond.load25
; VBITS_GE_256-NEXT: ld1 { v0.h }[7], [x0]
; VBITS_GE_256-NEXT: .LBB29_11: // %else26
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB29_12: // %cond.load5
; VBITS_GE_256-NEXT: ld1 { v0.h }[2], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB29_6
; VBITS_GE_256-NEXT: .LBB29_13: // %cond.load9
; VBITS_GE_256-NEXT: ld1 { v0.h }[3], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB29_7
; VBITS_GE_256-NEXT: .LBB29_14: // %cond.load13
; VBITS_GE_256-NEXT: ld1 { v0.h }[4], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB29_8
; VBITS_GE_256-NEXT: .LBB29_15: // %cond.load17
; VBITS_GE_256-NEXT: ld1 { v0.h }[5], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB29_9
; VBITS_GE_256-NEXT: .LBB29_16: // %cond.load21
; VBITS_GE_256-NEXT: ld1 { v0.h }[6], [x0], #2
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB29_10
; VBITS_GE_256-NEXT: b .LBB29_11
;
; VBITS_GE_512-LABEL: masked_load_sext_v8i16i64_m64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB29_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: fmov s0, w9
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB29_3
; VBITS_GE_512-NEXT: b .LBB29_4
; VBITS_GE_512-NEXT: .LBB29_2:
; VBITS_GE_512-NEXT: // implicit-def: $q0
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB29_4
; VBITS_GE_512-NEXT: .LBB29_3: // %cond.load1
; VBITS_GE_512-NEXT: ld1 { v0.h }[1], [x0], #2
; VBITS_GE_512-NEXT: .LBB29_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB29_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB29_13
; VBITS_GE_512-NEXT: .LBB29_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB29_14
; VBITS_GE_512-NEXT: .LBB29_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB29_15
; VBITS_GE_512-NEXT: .LBB29_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB29_16
; VBITS_GE_512-NEXT: .LBB29_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB29_11
; VBITS_GE_512-NEXT: .LBB29_10: // %cond.load25
; VBITS_GE_512-NEXT: ld1 { v0.h }[7], [x0]
; VBITS_GE_512-NEXT: .LBB29_11: // %else26
; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB29_12: // %cond.load5
; VBITS_GE_512-NEXT: ld1 { v0.h }[2], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB29_6
; VBITS_GE_512-NEXT: .LBB29_13: // %cond.load9
; VBITS_GE_512-NEXT: ld1 { v0.h }[3], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB29_7
; VBITS_GE_512-NEXT: .LBB29_14: // %cond.load13
; VBITS_GE_512-NEXT: ld1 { v0.h }[4], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB29_8
; VBITS_GE_512-NEXT: .LBB29_15: // %cond.load17
; VBITS_GE_512-NEXT: ld1 { v0.h }[5], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB29_9
; VBITS_GE_512-NEXT: .LBB29_16: // %cond.load21
; VBITS_GE_512-NEXT: ld1 { v0.h }[6], [x0], #2
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB29_10
; VBITS_GE_512-NEXT: b .LBB29_11
;
; CHECK-EXPAND-LABEL: masked_load_sext_v8i16i64_m64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z1.d, #0
; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: ptrue p1.s, vl4
; CHECK-EXPAND-NEXT: uzp1 z3.s, z0.s, z0.s
; CHECK-EXPAND-NEXT: uzp1 z2.s, z1.s, z1.s
; CHECK-EXPAND-NEXT: splice z0.s, p1, { z2.s, z3.s }
; CHECK-EXPAND-NEXT: ptrue p1.h, vl8
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: cmpne p2.h, p1/z, z0.h, #0
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.h
; CHECK-EXPAND-NEXT: whilelo p1.h, xzr, x9
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.h, p2, z0.h
; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-EXPAND-NEXT: sunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: sunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %b, zeroinitializer
%load = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %ap, <8 x i1> %mask, <8 x i16> poison)
%ext = sext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_sext_v8i32i64_m64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_v8i32i64_m64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0
; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s, vl4
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB30_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rw { z0.s }, p1/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #4
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB30_3
; VBITS_GE_256-NEXT: b .LBB30_4
; VBITS_GE_256-NEXT: .LBB30_2:
; VBITS_GE_256-NEXT: ptrue p2.s, vl8
; VBITS_GE_256-NEXT: adrp x9, .LCPI30_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI30_0
; VBITS_GE_256-NEXT: ld1w { z0.s }, p2/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB30_4
; VBITS_GE_256-NEXT: .LBB30_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: .LBB30_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB30_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB30_13
; VBITS_GE_256-NEXT: .LBB30_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB30_14
; VBITS_GE_256-NEXT: .LBB30_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB30_15
; VBITS_GE_256-NEXT: .LBB30_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB30_16
; VBITS_GE_256-NEXT: .LBB30_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB30_11
; VBITS_GE_256-NEXT: .LBB30_10: // %cond.load25
; VBITS_GE_256-NEXT: mov w8, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w8
; VBITS_GE_256-NEXT: ldr w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w8
; VBITS_GE_256-NEXT: .LBB30_11: // %else26
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB30_12: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB30_6
; VBITS_GE_256-NEXT: .LBB30_13: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB30_7
; VBITS_GE_256-NEXT: .LBB30_14: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB30_8
; VBITS_GE_256-NEXT: .LBB30_15: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB30_9
; VBITS_GE_256-NEXT: .LBB30_16: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB30_10
; VBITS_GE_256-NEXT: b .LBB30_11
;
; VBITS_GE_512-LABEL: masked_load_sext_v8i32i64_m64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB30_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rw { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #4
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB30_3
; VBITS_GE_512-NEXT: b .LBB30_4
; VBITS_GE_512-NEXT: .LBB30_2:
; VBITS_GE_512-NEXT: ptrue p2.s, vl8
; VBITS_GE_512-NEXT: adrp x9, .LCPI30_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI30_0
; VBITS_GE_512-NEXT: ld1w { z0.s }, p2/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB30_4
; VBITS_GE_512-NEXT: .LBB30_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: .LBB30_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB30_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB30_13
; VBITS_GE_512-NEXT: .LBB30_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB30_14
; VBITS_GE_512-NEXT: .LBB30_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB30_15
; VBITS_GE_512-NEXT: .LBB30_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB30_16
; VBITS_GE_512-NEXT: .LBB30_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB30_11
; VBITS_GE_512-NEXT: .LBB30_10: // %cond.load25
; VBITS_GE_512-NEXT: mov w8, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w8
; VBITS_GE_512-NEXT: ldr w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w8
; VBITS_GE_512-NEXT: .LBB30_11: // %else26
; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB30_12: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB30_6
; VBITS_GE_512-NEXT: .LBB30_13: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB30_7
; VBITS_GE_512-NEXT: .LBB30_14: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB30_8
; VBITS_GE_512-NEXT: .LBB30_15: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB30_9
; VBITS_GE_512-NEXT: .LBB30_16: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB30_10
; VBITS_GE_512-NEXT: b .LBB30_11
;
; CHECK-EXPAND-LABEL: masked_load_sext_v8i32i64_m64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z1.d, #0
; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: ptrue p1.s, vl4
; CHECK-EXPAND-NEXT: uzp1 z3.s, z0.s, z0.s
; CHECK-EXPAND-NEXT: uzp1 z2.s, z1.s, z1.s
; CHECK-EXPAND-NEXT: splice z0.s, p1, { z2.s, z3.s }
; CHECK-EXPAND-NEXT: ptrue p1.s, vl8
; CHECK-EXPAND-NEXT: cmpne p2.s, p1/z, z0.s, #0
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.s
; CHECK-EXPAND-NEXT: whilelo p1.s, xzr, x9
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.s, p2, z0.s
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %b, zeroinitializer
%load = call <8 x i32> @llvm.masked.expandload.v8i32(ptr %ap, <8 x i1> %mask, <8 x i32> poison)
%ext = sext <8 x i32> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_zext_v32i8i16_m16(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v32i8i16_m16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z1.h, #0
; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z2.h, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.b
; VBITS_GE_256-NEXT: uzp1 z1.b, z0.b, z0.b
; VBITS_GE_256-NEXT: uzp1 z0.b, z2.b, z2.b
; VBITS_GE_256-NEXT: umov w8, v1.b[0]
; VBITS_GE_256-NEXT: umov w11, v0.b[3]
; VBITS_GE_256-NEXT: umov w12, v0.b[4]
; VBITS_GE_256-NEXT: umov w13, v1.b[1]
; VBITS_GE_256-NEXT: umov w9, v1.b[7]
; VBITS_GE_256-NEXT: umov w10, v1.b[8]
; VBITS_GE_256-NEXT: umov w16, v1.b[9]
; VBITS_GE_256-NEXT: umov w17, v1.b[10]
; VBITS_GE_256-NEXT: umov w18, v0.b[5]
; VBITS_GE_256-NEXT: umov w14, v1.b[2]
; VBITS_GE_256-NEXT: umov w15, v1.b[3]
; VBITS_GE_256-NEXT: umov w1, v1.b[4]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: ubfiz w11, w11, #19, #1
; VBITS_GE_256-NEXT: ubfiz w12, w12, #20, #1
; VBITS_GE_256-NEXT: bfi w8, w13, #1, #1
; VBITS_GE_256-NEXT: umov w13, v0.b[6]
; VBITS_GE_256-NEXT: ubfiz w9, w9, #7, #1
; VBITS_GE_256-NEXT: ubfiz w10, w10, #8, #1
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: umov w12, v1.b[11]
; VBITS_GE_256-NEXT: ubfiz w16, w16, #9, #1
; VBITS_GE_256-NEXT: ubfiz w17, w17, #10, #1
; VBITS_GE_256-NEXT: ubfiz w18, w18, #21, #1
; VBITS_GE_256-NEXT: orr w9, w9, w10
; VBITS_GE_256-NEXT: bfi w8, w14, #2, #1
; VBITS_GE_256-NEXT: umov w14, v0.b[7]
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: umov w16, v1.b[12]
; VBITS_GE_256-NEXT: ubfiz w13, w13, #22, #1
; VBITS_GE_256-NEXT: orr w11, w11, w18
; VBITS_GE_256-NEXT: umov w18, v0.b[8]
; VBITS_GE_256-NEXT: orr w9, w9, w17
; VBITS_GE_256-NEXT: umov w17, v1.b[13]
; VBITS_GE_256-NEXT: ubfiz w12, w12, #11, #1
; VBITS_GE_256-NEXT: orr w11, w11, w13
; VBITS_GE_256-NEXT: umov w13, v1.b[14]
; VBITS_GE_256-NEXT: bfi w8, w15, #3, #1
; VBITS_GE_256-NEXT: umov w15, v0.b[9]
; VBITS_GE_256-NEXT: orr w9, w9, w12
; VBITS_GE_256-NEXT: umov w12, v0.b[10]
; VBITS_GE_256-NEXT: ubfiz w14, w14, #23, #1
; VBITS_GE_256-NEXT: ubfiz w16, w16, #12, #1
; VBITS_GE_256-NEXT: ubfiz w18, w18, #24, #1
; VBITS_GE_256-NEXT: umov w10, v1.b[5]
; VBITS_GE_256-NEXT: ubfiz w17, w17, #13, #1
; VBITS_GE_256-NEXT: orr w11, w11, w14
; VBITS_GE_256-NEXT: bfi w8, w1, #4, #1
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: umov w16, v1.b[15]
; VBITS_GE_256-NEXT: ubfiz w15, w15, #25, #1
; VBITS_GE_256-NEXT: ubfiz w13, w13, #14, #1
; VBITS_GE_256-NEXT: orr w11, w11, w18
; VBITS_GE_256-NEXT: umov w18, v0.b[0]
; VBITS_GE_256-NEXT: umov w1, v0.b[11]
; VBITS_GE_256-NEXT: ubfiz w12, w12, #26, #1
; VBITS_GE_256-NEXT: orr w9, w9, w17
; VBITS_GE_256-NEXT: umov w17, v0.b[1]
; VBITS_GE_256-NEXT: orr w11, w11, w15
; VBITS_GE_256-NEXT: orr w9, w9, w13
; VBITS_GE_256-NEXT: umov w13, v0.b[12]
; VBITS_GE_256-NEXT: umov w14, v1.b[6]
; VBITS_GE_256-NEXT: umov w15, v0.b[2]
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: umov w12, v0.b[13]
; VBITS_GE_256-NEXT: ubfiz w16, w16, #15, #1
; VBITS_GE_256-NEXT: bfi w8, w10, #5, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[14]
; VBITS_GE_256-NEXT: ubfiz w1, w1, #27, #1
; VBITS_GE_256-NEXT: ubfiz w18, w18, #16, #1
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: ubfiz w16, w17, #17, #1
; VBITS_GE_256-NEXT: ubfiz w13, w13, #28, #1
; VBITS_GE_256-NEXT: orr w11, w11, w1
; VBITS_GE_256-NEXT: bfi w8, w14, #6, #1
; VBITS_GE_256-NEXT: orr w9, w9, w18
; VBITS_GE_256-NEXT: ubfiz w14, w15, #18, #1
; VBITS_GE_256-NEXT: ubfiz w12, w12, #29, #1
; VBITS_GE_256-NEXT: orr w9, w9, w16
; VBITS_GE_256-NEXT: orr w11, w11, w13
; VBITS_GE_256-NEXT: ubfiz w10, w10, #30, #1
; VBITS_GE_256-NEXT: umov w13, v0.b[15]
; VBITS_GE_256-NEXT: orr w9, w9, w14
; VBITS_GE_256-NEXT: orr w11, w11, w12
; VBITS_GE_256-NEXT: orr w8, w8, w9
; VBITS_GE_256-NEXT: orr w9, w11, w10
; VBITS_GE_256-NEXT: orr w8, w8, w9
; VBITS_GE_256-NEXT: orr w8, w8, w13, lsl #31
; VBITS_GE_256-NEXT: tbz w8, #0, .LBB31_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rb { z0.b }, p1/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #1
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB31_3
; VBITS_GE_256-NEXT: b .LBB31_4
; VBITS_GE_256-NEXT: .LBB31_2:
; VBITS_GE_256-NEXT: ptrue p2.b, vl32
; VBITS_GE_256-NEXT: adrp x9, .LCPI31_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI31_0
; VBITS_GE_256-NEXT: ld1b { z0.b }, p2/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB31_4
; VBITS_GE_256-NEXT: .LBB31_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: .LBB31_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB31_36
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB31_37
; VBITS_GE_256-NEXT: .LBB31_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB31_38
; VBITS_GE_256-NEXT: .LBB31_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB31_39
; VBITS_GE_256-NEXT: .LBB31_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB31_40
; VBITS_GE_256-NEXT: .LBB31_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB31_41
; VBITS_GE_256-NEXT: .LBB31_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB31_42
; VBITS_GE_256-NEXT: .LBB31_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB31_43
; VBITS_GE_256-NEXT: .LBB31_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB31_44
; VBITS_GE_256-NEXT: .LBB31_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB31_45
; VBITS_GE_256-NEXT: .LBB31_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB31_46
; VBITS_GE_256-NEXT: .LBB31_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB31_47
; VBITS_GE_256-NEXT: .LBB31_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB31_48
; VBITS_GE_256-NEXT: .LBB31_17: // %else54
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB31_49
; VBITS_GE_256-NEXT: .LBB31_18: // %else58
; VBITS_GE_256-NEXT: tbnz w8, #16, .LBB31_50
; VBITS_GE_256-NEXT: .LBB31_19: // %else62
; VBITS_GE_256-NEXT: tbnz w8, #17, .LBB31_51
; VBITS_GE_256-NEXT: .LBB31_20: // %else66
; VBITS_GE_256-NEXT: tbnz w8, #18, .LBB31_52
; VBITS_GE_256-NEXT: .LBB31_21: // %else70
; VBITS_GE_256-NEXT: tbnz w8, #19, .LBB31_53
; VBITS_GE_256-NEXT: .LBB31_22: // %else74
; VBITS_GE_256-NEXT: tbnz w8, #20, .LBB31_54
; VBITS_GE_256-NEXT: .LBB31_23: // %else78
; VBITS_GE_256-NEXT: tbnz w8, #21, .LBB31_55
; VBITS_GE_256-NEXT: .LBB31_24: // %else82
; VBITS_GE_256-NEXT: tbnz w8, #22, .LBB31_56
; VBITS_GE_256-NEXT: .LBB31_25: // %else86
; VBITS_GE_256-NEXT: tbnz w8, #23, .LBB31_57
; VBITS_GE_256-NEXT: .LBB31_26: // %else90
; VBITS_GE_256-NEXT: tbnz w8, #24, .LBB31_58
; VBITS_GE_256-NEXT: .LBB31_27: // %else94
; VBITS_GE_256-NEXT: tbnz w8, #25, .LBB31_59
; VBITS_GE_256-NEXT: .LBB31_28: // %else98
; VBITS_GE_256-NEXT: tbnz w8, #26, .LBB31_60
; VBITS_GE_256-NEXT: .LBB31_29: // %else102
; VBITS_GE_256-NEXT: tbnz w8, #27, .LBB31_61
; VBITS_GE_256-NEXT: .LBB31_30: // %else106
; VBITS_GE_256-NEXT: tbnz w8, #28, .LBB31_62
; VBITS_GE_256-NEXT: .LBB31_31: // %else110
; VBITS_GE_256-NEXT: tbnz w8, #29, .LBB31_63
; VBITS_GE_256-NEXT: .LBB31_32: // %else114
; VBITS_GE_256-NEXT: tbnz w8, #30, .LBB31_64
; VBITS_GE_256-NEXT: .LBB31_33: // %else118
; VBITS_GE_256-NEXT: tbz w8, #31, .LBB31_35
; VBITS_GE_256-NEXT: .LBB31_34: // %cond.load121
; VBITS_GE_256-NEXT: mov w8, #31 // =0x1f
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w8
; VBITS_GE_256-NEXT: ldrb w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w8
; VBITS_GE_256-NEXT: .LBB31_35: // %else122
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB31_36: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB31_6
; VBITS_GE_256-NEXT: .LBB31_37: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB31_7
; VBITS_GE_256-NEXT: .LBB31_38: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB31_8
; VBITS_GE_256-NEXT: .LBB31_39: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB31_9
; VBITS_GE_256-NEXT: .LBB31_40: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB31_10
; VBITS_GE_256-NEXT: .LBB31_41: // %cond.load25
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB31_11
; VBITS_GE_256-NEXT: .LBB31_42: // %cond.load29
; VBITS_GE_256-NEXT: mov w9, #8 // =0x8
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB31_12
; VBITS_GE_256-NEXT: .LBB31_43: // %cond.load33
; VBITS_GE_256-NEXT: mov w9, #9 // =0x9
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB31_13
; VBITS_GE_256-NEXT: .LBB31_44: // %cond.load37
; VBITS_GE_256-NEXT: mov w9, #10 // =0xa
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB31_14
; VBITS_GE_256-NEXT: .LBB31_45: // %cond.load41
; VBITS_GE_256-NEXT: mov w9, #11 // =0xb
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB31_15
; VBITS_GE_256-NEXT: .LBB31_46: // %cond.load45
; VBITS_GE_256-NEXT: mov w9, #12 // =0xc
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB31_16
; VBITS_GE_256-NEXT: .LBB31_47: // %cond.load49
; VBITS_GE_256-NEXT: mov w9, #13 // =0xd
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB31_17
; VBITS_GE_256-NEXT: .LBB31_48: // %cond.load53
; VBITS_GE_256-NEXT: mov w9, #14 // =0xe
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB31_18
; VBITS_GE_256-NEXT: .LBB31_49: // %cond.load57
; VBITS_GE_256-NEXT: mov w9, #15 // =0xf
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #16, .LBB31_19
; VBITS_GE_256-NEXT: .LBB31_50: // %cond.load61
; VBITS_GE_256-NEXT: mov w9, #16 // =0x10
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #17, .LBB31_20
; VBITS_GE_256-NEXT: .LBB31_51: // %cond.load65
; VBITS_GE_256-NEXT: mov w9, #17 // =0x11
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #18, .LBB31_21
; VBITS_GE_256-NEXT: .LBB31_52: // %cond.load69
; VBITS_GE_256-NEXT: mov w9, #18 // =0x12
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #19, .LBB31_22
; VBITS_GE_256-NEXT: .LBB31_53: // %cond.load73
; VBITS_GE_256-NEXT: mov w9, #19 // =0x13
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #20, .LBB31_23
; VBITS_GE_256-NEXT: .LBB31_54: // %cond.load77
; VBITS_GE_256-NEXT: mov w9, #20 // =0x14
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #21, .LBB31_24
; VBITS_GE_256-NEXT: .LBB31_55: // %cond.load81
; VBITS_GE_256-NEXT: mov w9, #21 // =0x15
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #22, .LBB31_25
; VBITS_GE_256-NEXT: .LBB31_56: // %cond.load85
; VBITS_GE_256-NEXT: mov w9, #22 // =0x16
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #23, .LBB31_26
; VBITS_GE_256-NEXT: .LBB31_57: // %cond.load89
; VBITS_GE_256-NEXT: mov w9, #23 // =0x17
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #24, .LBB31_27
; VBITS_GE_256-NEXT: .LBB31_58: // %cond.load93
; VBITS_GE_256-NEXT: mov w9, #24 // =0x18
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #25, .LBB31_28
; VBITS_GE_256-NEXT: .LBB31_59: // %cond.load97
; VBITS_GE_256-NEXT: mov w9, #25 // =0x19
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #26, .LBB31_29
; VBITS_GE_256-NEXT: .LBB31_60: // %cond.load101
; VBITS_GE_256-NEXT: mov w9, #26 // =0x1a
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #27, .LBB31_30
; VBITS_GE_256-NEXT: .LBB31_61: // %cond.load105
; VBITS_GE_256-NEXT: mov w9, #27 // =0x1b
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #28, .LBB31_31
; VBITS_GE_256-NEXT: .LBB31_62: // %cond.load109
; VBITS_GE_256-NEXT: mov w9, #28 // =0x1c
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #29, .LBB31_32
; VBITS_GE_256-NEXT: .LBB31_63: // %cond.load113
; VBITS_GE_256-NEXT: mov w9, #29 // =0x1d
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #30, .LBB31_33
; VBITS_GE_256-NEXT: .LBB31_64: // %cond.load117
; VBITS_GE_256-NEXT: mov w9, #30 // =0x1e
; VBITS_GE_256-NEXT: index z1.b, #0, #1
; VBITS_GE_256-NEXT: mov z2.b, w9
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_256-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #31, .LBB31_34
; VBITS_GE_256-NEXT: b .LBB31_35
;
; VBITS_GE_512-LABEL: masked_load_zext_v32i8i16_m16:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #112
; VBITS_GE_512-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 112
; VBITS_GE_512-NEXT: .cfi_offset w19, -8
; VBITS_GE_512-NEXT: .cfi_offset w20, -16
; VBITS_GE_512-NEXT: .cfi_offset w21, -24
; VBITS_GE_512-NEXT: .cfi_offset w22, -32
; VBITS_GE_512-NEXT: .cfi_offset w23, -40
; VBITS_GE_512-NEXT: .cfi_offset w24, -48
; VBITS_GE_512-NEXT: .cfi_offset w25, -56
; VBITS_GE_512-NEXT: .cfi_offset w26, -64
; VBITS_GE_512-NEXT: .cfi_offset w27, -72
; VBITS_GE_512-NEXT: .cfi_offset w28, -80
; VBITS_GE_512-NEXT: .cfi_offset w30, -88
; VBITS_GE_512-NEXT: .cfi_offset w29, -96
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
; VBITS_GE_512-NEXT: str x2, [sp] // 8-byte Spill
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.b
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w12, v0.b[1]
; VBITS_GE_512-NEXT: fmov w6, s0
; VBITS_GE_512-NEXT: umov w3, v0.b[7]
; VBITS_GE_512-NEXT: umov w5, v0.b[8]
; VBITS_GE_512-NEXT: mov z5.b, z0.b[18]
; VBITS_GE_512-NEXT: mov z6.b, z0.b[19]
; VBITS_GE_512-NEXT: umov w13, v0.b[2]
; VBITS_GE_512-NEXT: umov w4, v0.b[9]
; VBITS_GE_512-NEXT: mov z7.b, z0.b[20]
; VBITS_GE_512-NEXT: umov w1, v0.b[10]
; VBITS_GE_512-NEXT: and w6, w6, #0x1
; VBITS_GE_512-NEXT: mov z16.b, z0.b[21]
; VBITS_GE_512-NEXT: fmov w20, s5
; VBITS_GE_512-NEXT: fmov w21, s6
; VBITS_GE_512-NEXT: bfi w6, w12, #1, #1
; VBITS_GE_512-NEXT: umov w11, v0.b[3]
; VBITS_GE_512-NEXT: umov w16, v0.b[11]
; VBITS_GE_512-NEXT: mov z17.b, z0.b[22]
; VBITS_GE_512-NEXT: fmov w22, s7
; VBITS_GE_512-NEXT: ubfiz w12, w3, #7, #1
; VBITS_GE_512-NEXT: ubfiz w3, w5, #8, #1
; VBITS_GE_512-NEXT: umov w17, v0.b[12]
; VBITS_GE_512-NEXT: mov z18.b, z0.b[23]
; VBITS_GE_512-NEXT: bfi w6, w13, #2, #1
; VBITS_GE_512-NEXT: ubfiz w13, w4, #9, #1
; VBITS_GE_512-NEXT: umov w18, v0.b[13]
; VBITS_GE_512-NEXT: mov z19.b, z0.b[24]
; VBITS_GE_512-NEXT: fmov w23, s16
; VBITS_GE_512-NEXT: ubfiz w5, w20, #18, #1
; VBITS_GE_512-NEXT: ubfiz w20, w21, #19, #1
; VBITS_GE_512-NEXT: orr w12, w12, w3
; VBITS_GE_512-NEXT: ubfiz w1, w1, #10, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: mov z20.b, z0.b[25]
; VBITS_GE_512-NEXT: fmov w24, s17
; VBITS_GE_512-NEXT: ubfiz w4, w22, #20, #1
; VBITS_GE_512-NEXT: orr w12, w12, w13
; VBITS_GE_512-NEXT: mov z21.b, z0.b[26]
; VBITS_GE_512-NEXT: fmov w25, s18
; VBITS_GE_512-NEXT: orr w3, w5, w20
; VBITS_GE_512-NEXT: bfi w6, w11, #3, #1
; VBITS_GE_512-NEXT: orr w11, w12, w1
; VBITS_GE_512-NEXT: ubfiz w12, w16, #11, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: mov z22.b, z0.b[27]
; VBITS_GE_512-NEXT: fmov w26, s19
; VBITS_GE_512-NEXT: orr w13, w3, w4
; VBITS_GE_512-NEXT: ubfiz w3, w23, #21, #1
; VBITS_GE_512-NEXT: ubfiz w16, w17, #12, #1
; VBITS_GE_512-NEXT: fmov w27, s20
; VBITS_GE_512-NEXT: ubfiz w17, w24, #22, #1
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: ubfiz w12, w18, #13, #1
; VBITS_GE_512-NEXT: fmov w28, s21
; VBITS_GE_512-NEXT: orr w13, w13, w3
; VBITS_GE_512-NEXT: ubfiz w18, w25, #23, #1
; VBITS_GE_512-NEXT: bfi w6, w10, #4, #1
; VBITS_GE_512-NEXT: orr w10, w11, w16
; VBITS_GE_512-NEXT: umov w15, v0.b[15]
; VBITS_GE_512-NEXT: mov z3.b, z0.b[16]
; VBITS_GE_512-NEXT: mov z23.b, z0.b[28]
; VBITS_GE_512-NEXT: fmov w29, s22
; VBITS_GE_512-NEXT: orr w11, w13, w17
; VBITS_GE_512-NEXT: orr w10, w10, w12
; VBITS_GE_512-NEXT: ubfiz w12, w26, #24, #1
; VBITS_GE_512-NEXT: mov z4.b, z0.b[17]
; VBITS_GE_512-NEXT: mov z24.b, z0.b[29]
; VBITS_GE_512-NEXT: orr w11, w11, w18
; VBITS_GE_512-NEXT: bfi w6, w9, #5, #1
; VBITS_GE_512-NEXT: ubfiz w9, w14, #14, #1
; VBITS_GE_512-NEXT: ubfiz w13, w27, #25, #1
; VBITS_GE_512-NEXT: mov z2.b, z0.b[30]
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: ubfiz w14, w28, #26, #1
; VBITS_GE_512-NEXT: fmov w7, s3
; VBITS_GE_512-NEXT: fmov w30, s23
; VBITS_GE_512-NEXT: orr w9, w10, w9
; VBITS_GE_512-NEXT: orr w10, w11, w13
; VBITS_GE_512-NEXT: ubfiz w11, w29, #27, #1
; VBITS_GE_512-NEXT: umov w2, v0.b[6]
; VBITS_GE_512-NEXT: fmov w19, s4
; VBITS_GE_512-NEXT: fmov w8, s24
; VBITS_GE_512-NEXT: ubfiz w12, w15, #15, #1
; VBITS_GE_512-NEXT: orr w10, w10, w14
; VBITS_GE_512-NEXT: ubfiz w14, w30, #28, #1
; VBITS_GE_512-NEXT: mov z1.b, z0.b[31]
; VBITS_GE_512-NEXT: orr w10, w10, w11
; VBITS_GE_512-NEXT: fmov w11, s2
; VBITS_GE_512-NEXT: orr w9, w9, w12
; VBITS_GE_512-NEXT: ubfiz w12, w7, #16, #1
; VBITS_GE_512-NEXT: ubfiz w13, w19, #17, #1
; VBITS_GE_512-NEXT: ubfiz w8, w8, #29, #1
; VBITS_GE_512-NEXT: bfi w6, w2, #6, #1
; VBITS_GE_512-NEXT: orr w10, w10, w14
; VBITS_GE_512-NEXT: orr w9, w9, w12
; VBITS_GE_512-NEXT: ubfiz w11, w11, #30, #1
; VBITS_GE_512-NEXT: orr w8, w10, w8
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: orr w9, w6, w9
; VBITS_GE_512-NEXT: orr w8, w8, w11
; VBITS_GE_512-NEXT: orr w8, w9, w8
; VBITS_GE_512-NEXT: fmov w9, s1
; VBITS_GE_512-NEXT: orr w8, w8, w9, lsl #31
; VBITS_GE_512-NEXT: tbz w8, #0, .LBB31_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rb { z0.b }, p1/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #1
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB31_3
; VBITS_GE_512-NEXT: b .LBB31_4
; VBITS_GE_512-NEXT: .LBB31_2:
; VBITS_GE_512-NEXT: ptrue p2.b, vl32
; VBITS_GE_512-NEXT: adrp x9, .LCPI31_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI31_0
; VBITS_GE_512-NEXT: ld1b { z0.b }, p2/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB31_4
; VBITS_GE_512-NEXT: .LBB31_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: .LBB31_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB31_36
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB31_37
; VBITS_GE_512-NEXT: .LBB31_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB31_38
; VBITS_GE_512-NEXT: .LBB31_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB31_39
; VBITS_GE_512-NEXT: .LBB31_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB31_40
; VBITS_GE_512-NEXT: .LBB31_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB31_41
; VBITS_GE_512-NEXT: .LBB31_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB31_42
; VBITS_GE_512-NEXT: .LBB31_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB31_43
; VBITS_GE_512-NEXT: .LBB31_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB31_44
; VBITS_GE_512-NEXT: .LBB31_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB31_45
; VBITS_GE_512-NEXT: .LBB31_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB31_46
; VBITS_GE_512-NEXT: .LBB31_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB31_47
; VBITS_GE_512-NEXT: .LBB31_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB31_48
; VBITS_GE_512-NEXT: .LBB31_17: // %else54
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB31_49
; VBITS_GE_512-NEXT: .LBB31_18: // %else58
; VBITS_GE_512-NEXT: tbnz w8, #16, .LBB31_50
; VBITS_GE_512-NEXT: .LBB31_19: // %else62
; VBITS_GE_512-NEXT: tbnz w8, #17, .LBB31_51
; VBITS_GE_512-NEXT: .LBB31_20: // %else66
; VBITS_GE_512-NEXT: tbnz w8, #18, .LBB31_52
; VBITS_GE_512-NEXT: .LBB31_21: // %else70
; VBITS_GE_512-NEXT: tbnz w8, #19, .LBB31_53
; VBITS_GE_512-NEXT: .LBB31_22: // %else74
; VBITS_GE_512-NEXT: tbnz w8, #20, .LBB31_54
; VBITS_GE_512-NEXT: .LBB31_23: // %else78
; VBITS_GE_512-NEXT: tbnz w8, #21, .LBB31_55
; VBITS_GE_512-NEXT: .LBB31_24: // %else82
; VBITS_GE_512-NEXT: tbnz w8, #22, .LBB31_56
; VBITS_GE_512-NEXT: .LBB31_25: // %else86
; VBITS_GE_512-NEXT: tbnz w8, #23, .LBB31_57
; VBITS_GE_512-NEXT: .LBB31_26: // %else90
; VBITS_GE_512-NEXT: tbnz w8, #24, .LBB31_58
; VBITS_GE_512-NEXT: .LBB31_27: // %else94
; VBITS_GE_512-NEXT: tbnz w8, #25, .LBB31_59
; VBITS_GE_512-NEXT: .LBB31_28: // %else98
; VBITS_GE_512-NEXT: tbnz w8, #26, .LBB31_60
; VBITS_GE_512-NEXT: .LBB31_29: // %else102
; VBITS_GE_512-NEXT: tbnz w8, #27, .LBB31_61
; VBITS_GE_512-NEXT: .LBB31_30: // %else106
; VBITS_GE_512-NEXT: tbnz w8, #28, .LBB31_62
; VBITS_GE_512-NEXT: .LBB31_31: // %else110
; VBITS_GE_512-NEXT: tbnz w8, #29, .LBB31_63
; VBITS_GE_512-NEXT: .LBB31_32: // %else114
; VBITS_GE_512-NEXT: tbnz w8, #30, .LBB31_64
; VBITS_GE_512-NEXT: .LBB31_33: // %else118
; VBITS_GE_512-NEXT: tbz w8, #31, .LBB31_35
; VBITS_GE_512-NEXT: .LBB31_34: // %cond.load121
; VBITS_GE_512-NEXT: mov w8, #31 // =0x1f
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w8
; VBITS_GE_512-NEXT: ldrb w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w8
; VBITS_GE_512-NEXT: .LBB31_35: // %else122
; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: ldr x8, [sp] // 8-byte Reload
; VBITS_GE_512-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8]
; VBITS_GE_512-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; VBITS_GE_512-NEXT: add sp, sp, #112
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB31_36: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB31_6
; VBITS_GE_512-NEXT: .LBB31_37: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB31_7
; VBITS_GE_512-NEXT: .LBB31_38: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB31_8
; VBITS_GE_512-NEXT: .LBB31_39: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB31_9
; VBITS_GE_512-NEXT: .LBB31_40: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB31_10
; VBITS_GE_512-NEXT: .LBB31_41: // %cond.load25
; VBITS_GE_512-NEXT: mov w9, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB31_11
; VBITS_GE_512-NEXT: .LBB31_42: // %cond.load29
; VBITS_GE_512-NEXT: mov w9, #8 // =0x8
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB31_12
; VBITS_GE_512-NEXT: .LBB31_43: // %cond.load33
; VBITS_GE_512-NEXT: mov w9, #9 // =0x9
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB31_13
; VBITS_GE_512-NEXT: .LBB31_44: // %cond.load37
; VBITS_GE_512-NEXT: mov w9, #10 // =0xa
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB31_14
; VBITS_GE_512-NEXT: .LBB31_45: // %cond.load41
; VBITS_GE_512-NEXT: mov w9, #11 // =0xb
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB31_15
; VBITS_GE_512-NEXT: .LBB31_46: // %cond.load45
; VBITS_GE_512-NEXT: mov w9, #12 // =0xc
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB31_16
; VBITS_GE_512-NEXT: .LBB31_47: // %cond.load49
; VBITS_GE_512-NEXT: mov w9, #13 // =0xd
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB31_17
; VBITS_GE_512-NEXT: .LBB31_48: // %cond.load53
; VBITS_GE_512-NEXT: mov w9, #14 // =0xe
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB31_18
; VBITS_GE_512-NEXT: .LBB31_49: // %cond.load57
; VBITS_GE_512-NEXT: mov w9, #15 // =0xf
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #16, .LBB31_19
; VBITS_GE_512-NEXT: .LBB31_50: // %cond.load61
; VBITS_GE_512-NEXT: mov w9, #16 // =0x10
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #17, .LBB31_20
; VBITS_GE_512-NEXT: .LBB31_51: // %cond.load65
; VBITS_GE_512-NEXT: mov w9, #17 // =0x11
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #18, .LBB31_21
; VBITS_GE_512-NEXT: .LBB31_52: // %cond.load69
; VBITS_GE_512-NEXT: mov w9, #18 // =0x12
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #19, .LBB31_22
; VBITS_GE_512-NEXT: .LBB31_53: // %cond.load73
; VBITS_GE_512-NEXT: mov w9, #19 // =0x13
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #20, .LBB31_23
; VBITS_GE_512-NEXT: .LBB31_54: // %cond.load77
; VBITS_GE_512-NEXT: mov w9, #20 // =0x14
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #21, .LBB31_24
; VBITS_GE_512-NEXT: .LBB31_55: // %cond.load81
; VBITS_GE_512-NEXT: mov w9, #21 // =0x15
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #22, .LBB31_25
; VBITS_GE_512-NEXT: .LBB31_56: // %cond.load85
; VBITS_GE_512-NEXT: mov w9, #22 // =0x16
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #23, .LBB31_26
; VBITS_GE_512-NEXT: .LBB31_57: // %cond.load89
; VBITS_GE_512-NEXT: mov w9, #23 // =0x17
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #24, .LBB31_27
; VBITS_GE_512-NEXT: .LBB31_58: // %cond.load93
; VBITS_GE_512-NEXT: mov w9, #24 // =0x18
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #25, .LBB31_28
; VBITS_GE_512-NEXT: .LBB31_59: // %cond.load97
; VBITS_GE_512-NEXT: mov w9, #25 // =0x19
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #26, .LBB31_29
; VBITS_GE_512-NEXT: .LBB31_60: // %cond.load101
; VBITS_GE_512-NEXT: mov w9, #26 // =0x1a
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #27, .LBB31_30
; VBITS_GE_512-NEXT: .LBB31_61: // %cond.load105
; VBITS_GE_512-NEXT: mov w9, #27 // =0x1b
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #28, .LBB31_31
; VBITS_GE_512-NEXT: .LBB31_62: // %cond.load109
; VBITS_GE_512-NEXT: mov w9, #28 // =0x1c
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #29, .LBB31_32
; VBITS_GE_512-NEXT: .LBB31_63: // %cond.load113
; VBITS_GE_512-NEXT: mov w9, #29 // =0x1d
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #30, .LBB31_33
; VBITS_GE_512-NEXT: .LBB31_64: // %cond.load117
; VBITS_GE_512-NEXT: mov w9, #30 // =0x1e
; VBITS_GE_512-NEXT: index z1.b, #0, #1
; VBITS_GE_512-NEXT: mov z2.b, w9
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: cmpeq p2.b, p1/z, z1.b, z2.b
; VBITS_GE_512-NEXT: mov z0.b, p2/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #31, .LBB31_34
; VBITS_GE_512-NEXT: b .LBB31_35
;
; CHECK-EXPAND-LABEL: masked_load_zext_v32i8i16_m16:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.h, vl16
; CHECK-EXPAND-NEXT: mov x8, #16 // =0x10
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p0/z, [x1, x8, lsl #1]
; CHECK-EXPAND-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-EXPAND-NEXT: cmpeq p2.h, p0/z, z1.h, #0
; CHECK-EXPAND-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: ptrue p1.b, vl16
; CHECK-EXPAND-NEXT: uzp1 z3.b, z0.b, z0.b
; CHECK-EXPAND-NEXT: uzp1 z2.b, z1.b, z1.b
; CHECK-EXPAND-NEXT: splice z0.b, p1, { z2.b, z3.b }
; CHECK-EXPAND-NEXT: ptrue p1.b, vl32
; CHECK-EXPAND-NEXT: cmpne p2.b, p1/z, z0.b, #0
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b
; CHECK-EXPAND-NEXT: whilelo p1.b, xzr, x9
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: uunpklo z0.h, z0.b
; CHECK-EXPAND-NEXT: uunpklo z1.h, z1.b
; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-EXPAND-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1]
; CHECK-EXPAND-NEXT: ret
%b = load <32 x i16>, ptr %bp
%mask = icmp eq <32 x i16> %b, zeroinitializer
%load = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %ap, <32 x i1> %mask, <32 x i8> poison)
%ext = zext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, ptr %c
ret void
}
define void @masked_load_zext_v16i8i32_m32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v16i8i32_m32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: adrp x8, .LCPI32_0
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, #0
; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
; VBITS_GE_256-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
; VBITS_GE_256-NEXT: and v0.16b, v1.16b, v0.16b
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b
; VBITS_GE_256-NEXT: addv h0, v0.8h
; VBITS_GE_256-NEXT: fmov w9, s0
; VBITS_GE_256-NEXT: fmov w8, s0
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB32_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: fmov s0, w9
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB32_3
; VBITS_GE_256-NEXT: b .LBB32_4
; VBITS_GE_256-NEXT: .LBB32_2:
; VBITS_GE_256-NEXT: // implicit-def: $q0
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB32_4
; VBITS_GE_256-NEXT: .LBB32_3: // %cond.load1
; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_256-NEXT: .LBB32_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB32_20
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB32_21
; VBITS_GE_256-NEXT: .LBB32_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB32_22
; VBITS_GE_256-NEXT: .LBB32_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB32_23
; VBITS_GE_256-NEXT: .LBB32_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB32_24
; VBITS_GE_256-NEXT: .LBB32_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB32_25
; VBITS_GE_256-NEXT: .LBB32_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB32_26
; VBITS_GE_256-NEXT: .LBB32_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB32_27
; VBITS_GE_256-NEXT: .LBB32_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB32_28
; VBITS_GE_256-NEXT: .LBB32_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB32_29
; VBITS_GE_256-NEXT: .LBB32_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB32_30
; VBITS_GE_256-NEXT: .LBB32_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB32_31
; VBITS_GE_256-NEXT: .LBB32_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB32_32
; VBITS_GE_256-NEXT: .LBB32_17: // %else54
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB32_19
; VBITS_GE_256-NEXT: .LBB32_18: // %cond.load57
; VBITS_GE_256-NEXT: ld1 { v0.b }[15], [x0]
; VBITS_GE_256-NEXT: .LBB32_19: // %else58
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB32_20: // %cond.load5
; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB32_6
; VBITS_GE_256-NEXT: .LBB32_21: // %cond.load9
; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB32_7
; VBITS_GE_256-NEXT: .LBB32_22: // %cond.load13
; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB32_8
; VBITS_GE_256-NEXT: .LBB32_23: // %cond.load17
; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB32_9
; VBITS_GE_256-NEXT: .LBB32_24: // %cond.load21
; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB32_10
; VBITS_GE_256-NEXT: .LBB32_25: // %cond.load25
; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB32_11
; VBITS_GE_256-NEXT: .LBB32_26: // %cond.load29
; VBITS_GE_256-NEXT: ld1 { v0.b }[8], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB32_12
; VBITS_GE_256-NEXT: .LBB32_27: // %cond.load33
; VBITS_GE_256-NEXT: ld1 { v0.b }[9], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB32_13
; VBITS_GE_256-NEXT: .LBB32_28: // %cond.load37
; VBITS_GE_256-NEXT: ld1 { v0.b }[10], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB32_14
; VBITS_GE_256-NEXT: .LBB32_29: // %cond.load41
; VBITS_GE_256-NEXT: ld1 { v0.b }[11], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB32_15
; VBITS_GE_256-NEXT: .LBB32_30: // %cond.load45
; VBITS_GE_256-NEXT: ld1 { v0.b }[12], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB32_16
; VBITS_GE_256-NEXT: .LBB32_31: // %cond.load49
; VBITS_GE_256-NEXT: ld1 { v0.b }[13], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB32_17
; VBITS_GE_256-NEXT: .LBB32_32: // %cond.load53
; VBITS_GE_256-NEXT: ld1 { v0.b }[14], [x0], #1
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB32_18
; VBITS_GE_256-NEXT: b .LBB32_19
;
; VBITS_GE_512-LABEL: masked_load_zext_v16i8i32_m32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: umov w11, v0.b[7]
; VBITS_GE_512-NEXT: umov w12, v0.b[8]
; VBITS_GE_512-NEXT: umov w13, v0.b[3]
; VBITS_GE_512-NEXT: umov w14, v0.b[4]
; VBITS_GE_512-NEXT: umov w15, v0.b[10]
; VBITS_GE_512-NEXT: umov w16, v0.b[5]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[9]
; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1
; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1
; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[11]
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: umov w12, v0.b[13]
; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[12]
; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1
; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: orr w9, w11, w9
; VBITS_GE_512-NEXT: umov w11, v0.b[6]
; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1
; VBITS_GE_512-NEXT: orr w9, w9, w15
; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1
; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[15]
; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: orr w9, w9, w12
; VBITS_GE_512-NEXT: orr w8, w8, w9
; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15
; VBITS_GE_512-NEXT: and w8, w9, #0xffff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB32_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: fmov s0, w9
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB32_3
; VBITS_GE_512-NEXT: b .LBB32_4
; VBITS_GE_512-NEXT: .LBB32_2:
; VBITS_GE_512-NEXT: // implicit-def: $q0
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB32_4
; VBITS_GE_512-NEXT: .LBB32_3: // %cond.load1
; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_512-NEXT: .LBB32_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB32_20
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB32_21
; VBITS_GE_512-NEXT: .LBB32_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB32_22
; VBITS_GE_512-NEXT: .LBB32_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB32_23
; VBITS_GE_512-NEXT: .LBB32_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB32_24
; VBITS_GE_512-NEXT: .LBB32_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB32_25
; VBITS_GE_512-NEXT: .LBB32_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB32_26
; VBITS_GE_512-NEXT: .LBB32_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB32_27
; VBITS_GE_512-NEXT: .LBB32_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB32_28
; VBITS_GE_512-NEXT: .LBB32_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB32_29
; VBITS_GE_512-NEXT: .LBB32_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB32_30
; VBITS_GE_512-NEXT: .LBB32_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB32_31
; VBITS_GE_512-NEXT: .LBB32_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB32_32
; VBITS_GE_512-NEXT: .LBB32_17: // %else54
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB32_19
; VBITS_GE_512-NEXT: .LBB32_18: // %cond.load57
; VBITS_GE_512-NEXT: ld1 { v0.b }[15], [x0]
; VBITS_GE_512-NEXT: .LBB32_19: // %else58
; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB32_20: // %cond.load5
; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB32_6
; VBITS_GE_512-NEXT: .LBB32_21: // %cond.load9
; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB32_7
; VBITS_GE_512-NEXT: .LBB32_22: // %cond.load13
; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB32_8
; VBITS_GE_512-NEXT: .LBB32_23: // %cond.load17
; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB32_9
; VBITS_GE_512-NEXT: .LBB32_24: // %cond.load21
; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB32_10
; VBITS_GE_512-NEXT: .LBB32_25: // %cond.load25
; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB32_11
; VBITS_GE_512-NEXT: .LBB32_26: // %cond.load29
; VBITS_GE_512-NEXT: ld1 { v0.b }[8], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB32_12
; VBITS_GE_512-NEXT: .LBB32_27: // %cond.load33
; VBITS_GE_512-NEXT: ld1 { v0.b }[9], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB32_13
; VBITS_GE_512-NEXT: .LBB32_28: // %cond.load37
; VBITS_GE_512-NEXT: ld1 { v0.b }[10], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB32_14
; VBITS_GE_512-NEXT: .LBB32_29: // %cond.load41
; VBITS_GE_512-NEXT: ld1 { v0.b }[11], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB32_15
; VBITS_GE_512-NEXT: .LBB32_30: // %cond.load45
; VBITS_GE_512-NEXT: ld1 { v0.b }[12], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB32_16
; VBITS_GE_512-NEXT: .LBB32_31: // %cond.load49
; VBITS_GE_512-NEXT: ld1 { v0.b }[13], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB32_17
; VBITS_GE_512-NEXT: .LBB32_32: // %cond.load53
; VBITS_GE_512-NEXT: ld1 { v0.b }[14], [x0], #1
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB32_18
; VBITS_GE_512-NEXT: b .LBB32_19
;
; CHECK-EXPAND-LABEL: masked_load_zext_v16i8i32_m32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2]
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cmpeq p2.s, p0/z, z1.s, #0
; CHECK-EXPAND-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: ptrue p1.b, vl16
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: uzp1 z1.h, z1.h, z1.h
; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-EXPAND-NEXT: uzp1 z1.b, z1.b, z1.b
; CHECK-EXPAND-NEXT: mov v1.d[1], v0.d[0]
; CHECK-EXPAND-NEXT: cmpne p2.b, p1/z, z1.b, #0
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b
; CHECK-EXPAND-NEXT: whilelo p1.b, xzr, x9
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b
; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-EXPAND-NEXT: uunpklo z0.h, z0.b
; CHECK-EXPAND-NEXT: uunpklo z1.h, z1.b
; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; CHECK-EXPAND-NEXT: ret
%b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %b, zeroinitializer
%load = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %ap, <16 x i1> %mask, <16 x i8> poison)
%ext = zext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, ptr %c
ret void
}
define void @masked_load_zext_v8i8i64_m64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v8i8i64_m64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0
; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s, vl4
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB33_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldrb w9, [x0], #1
; VBITS_GE_256-NEXT: fmov s0, w9
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB33_3
; VBITS_GE_256-NEXT: b .LBB33_4
; VBITS_GE_256-NEXT: .LBB33_2:
; VBITS_GE_256-NEXT: // implicit-def: $d0
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB33_4
; VBITS_GE_256-NEXT: .LBB33_3: // %cond.load1
; VBITS_GE_256-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_256-NEXT: .LBB33_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB33_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB33_13
; VBITS_GE_256-NEXT: .LBB33_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB33_14
; VBITS_GE_256-NEXT: .LBB33_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB33_15
; VBITS_GE_256-NEXT: .LBB33_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB33_16
; VBITS_GE_256-NEXT: .LBB33_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB33_11
; VBITS_GE_256-NEXT: .LBB33_10: // %cond.load25
; VBITS_GE_256-NEXT: ld1 { v0.b }[7], [x0]
; VBITS_GE_256-NEXT: .LBB33_11: // %else26
; VBITS_GE_256-NEXT: ushll v0.8h, v0.8b, #0
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB33_12: // %cond.load5
; VBITS_GE_256-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB33_6
; VBITS_GE_256-NEXT: .LBB33_13: // %cond.load9
; VBITS_GE_256-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB33_7
; VBITS_GE_256-NEXT: .LBB33_14: // %cond.load13
; VBITS_GE_256-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB33_8
; VBITS_GE_256-NEXT: .LBB33_15: // %cond.load17
; VBITS_GE_256-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB33_9
; VBITS_GE_256-NEXT: .LBB33_16: // %cond.load21
; VBITS_GE_256-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB33_10
; VBITS_GE_256-NEXT: b .LBB33_11
;
; VBITS_GE_512-LABEL: masked_load_zext_v8i8i64_m64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB33_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldrb w9, [x0], #1
; VBITS_GE_512-NEXT: fmov s0, w9
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB33_3
; VBITS_GE_512-NEXT: b .LBB33_4
; VBITS_GE_512-NEXT: .LBB33_2:
; VBITS_GE_512-NEXT: // implicit-def: $d0
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB33_4
; VBITS_GE_512-NEXT: .LBB33_3: // %cond.load1
; VBITS_GE_512-NEXT: ld1 { v0.b }[1], [x0], #1
; VBITS_GE_512-NEXT: .LBB33_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB33_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB33_13
; VBITS_GE_512-NEXT: .LBB33_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB33_14
; VBITS_GE_512-NEXT: .LBB33_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB33_15
; VBITS_GE_512-NEXT: .LBB33_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB33_16
; VBITS_GE_512-NEXT: .LBB33_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB33_11
; VBITS_GE_512-NEXT: .LBB33_10: // %cond.load25
; VBITS_GE_512-NEXT: ld1 { v0.b }[7], [x0]
; VBITS_GE_512-NEXT: .LBB33_11: // %else26
; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB33_12: // %cond.load5
; VBITS_GE_512-NEXT: ld1 { v0.b }[2], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB33_6
; VBITS_GE_512-NEXT: .LBB33_13: // %cond.load9
; VBITS_GE_512-NEXT: ld1 { v0.b }[3], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB33_7
; VBITS_GE_512-NEXT: .LBB33_14: // %cond.load13
; VBITS_GE_512-NEXT: ld1 { v0.b }[4], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB33_8
; VBITS_GE_512-NEXT: .LBB33_15: // %cond.load17
; VBITS_GE_512-NEXT: ld1 { v0.b }[5], [x0], #1
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB33_9
; VBITS_GE_512-NEXT: .LBB33_16: // %cond.load21
; VBITS_GE_512-NEXT: ld1 { v0.b }[6], [x0], #1
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB33_10
; VBITS_GE_512-NEXT: b .LBB33_11
;
; CHECK-EXPAND-LABEL: masked_load_zext_v8i8i64_m64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z1.d, #0
; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: ptrue p1.s, vl4
; CHECK-EXPAND-NEXT: uzp1 z3.s, z0.s, z0.s
; CHECK-EXPAND-NEXT: uzp1 z2.s, z1.s, z1.s
; CHECK-EXPAND-NEXT: splice z0.s, p1, { z2.s, z3.s }
; CHECK-EXPAND-NEXT: ptrue p1.b, vl8
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-EXPAND-NEXT: cmpne p2.b, p1/z, z0.b, #0
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.b
; CHECK-EXPAND-NEXT: whilelo p1.b, xzr, x9
; CHECK-EXPAND-NEXT: ld1b { z0.b }, p1/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.b, p2, z0.b
; CHECK-EXPAND-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %b, zeroinitializer
%load = call <8 x i8> @llvm.masked.expandload.v8i8(ptr %ap, <8 x i1> %mask, <8 x i8> poison)
%ext = zext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_zext_v16i16i32_m32(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v16i16i32_m32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: adrp x8, .LCPI34_0
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, #0
; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.h
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b
; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
; VBITS_GE_256-NEXT: ldr q0, [x8, :lo12:.LCPI34_0]
; VBITS_GE_256-NEXT: and v0.16b, v1.16b, v0.16b
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: zip1 v0.16b, v0.16b, v1.16b
; VBITS_GE_256-NEXT: addv h0, v0.8h
; VBITS_GE_256-NEXT: fmov w9, s0
; VBITS_GE_256-NEXT: fmov w8, s0
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB34_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rh { z0.h }, p1/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #2
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB34_3
; VBITS_GE_256-NEXT: b .LBB34_4
; VBITS_GE_256-NEXT: .LBB34_2:
; VBITS_GE_256-NEXT: ptrue p2.h, vl16
; VBITS_GE_256-NEXT: adrp x9, .LCPI34_1
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI34_1
; VBITS_GE_256-NEXT: ld1h { z0.h }, p2/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB34_4
; VBITS_GE_256-NEXT: .LBB34_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: .LBB34_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB34_20
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB34_21
; VBITS_GE_256-NEXT: .LBB34_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB34_22
; VBITS_GE_256-NEXT: .LBB34_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB34_23
; VBITS_GE_256-NEXT: .LBB34_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB34_24
; VBITS_GE_256-NEXT: .LBB34_9: // %else22
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB34_25
; VBITS_GE_256-NEXT: .LBB34_10: // %else26
; VBITS_GE_256-NEXT: tbnz w8, #8, .LBB34_26
; VBITS_GE_256-NEXT: .LBB34_11: // %else30
; VBITS_GE_256-NEXT: tbnz w8, #9, .LBB34_27
; VBITS_GE_256-NEXT: .LBB34_12: // %else34
; VBITS_GE_256-NEXT: tbnz w8, #10, .LBB34_28
; VBITS_GE_256-NEXT: .LBB34_13: // %else38
; VBITS_GE_256-NEXT: tbnz w8, #11, .LBB34_29
; VBITS_GE_256-NEXT: .LBB34_14: // %else42
; VBITS_GE_256-NEXT: tbnz w8, #12, .LBB34_30
; VBITS_GE_256-NEXT: .LBB34_15: // %else46
; VBITS_GE_256-NEXT: tbnz w8, #13, .LBB34_31
; VBITS_GE_256-NEXT: .LBB34_16: // %else50
; VBITS_GE_256-NEXT: tbnz w8, #14, .LBB34_32
; VBITS_GE_256-NEXT: .LBB34_17: // %else54
; VBITS_GE_256-NEXT: tbz w8, #15, .LBB34_19
; VBITS_GE_256-NEXT: .LBB34_18: // %cond.load57
; VBITS_GE_256-NEXT: mov w8, #15 // =0xf
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w8
; VBITS_GE_256-NEXT: ldrh w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w8
; VBITS_GE_256-NEXT: .LBB34_19: // %else58
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB34_20: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB34_6
; VBITS_GE_256-NEXT: .LBB34_21: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB34_7
; VBITS_GE_256-NEXT: .LBB34_22: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB34_8
; VBITS_GE_256-NEXT: .LBB34_23: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB34_9
; VBITS_GE_256-NEXT: .LBB34_24: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB34_10
; VBITS_GE_256-NEXT: .LBB34_25: // %cond.load25
; VBITS_GE_256-NEXT: mov w9, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #8, .LBB34_11
; VBITS_GE_256-NEXT: .LBB34_26: // %cond.load29
; VBITS_GE_256-NEXT: mov w9, #8 // =0x8
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #9, .LBB34_12
; VBITS_GE_256-NEXT: .LBB34_27: // %cond.load33
; VBITS_GE_256-NEXT: mov w9, #9 // =0x9
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #10, .LBB34_13
; VBITS_GE_256-NEXT: .LBB34_28: // %cond.load37
; VBITS_GE_256-NEXT: mov w9, #10 // =0xa
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #11, .LBB34_14
; VBITS_GE_256-NEXT: .LBB34_29: // %cond.load41
; VBITS_GE_256-NEXT: mov w9, #11 // =0xb
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #12, .LBB34_15
; VBITS_GE_256-NEXT: .LBB34_30: // %cond.load45
; VBITS_GE_256-NEXT: mov w9, #12 // =0xc
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #13, .LBB34_16
; VBITS_GE_256-NEXT: .LBB34_31: // %cond.load49
; VBITS_GE_256-NEXT: mov w9, #13 // =0xd
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #14, .LBB34_17
; VBITS_GE_256-NEXT: .LBB34_32: // %cond.load53
; VBITS_GE_256-NEXT: mov w9, #14 // =0xe
; VBITS_GE_256-NEXT: index z1.h, #0, #1
; VBITS_GE_256-NEXT: mov z2.h, w9
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_256-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #15, .LBB34_18
; VBITS_GE_256-NEXT: b .LBB34_19
;
; VBITS_GE_512-LABEL: masked_load_zext_v16i16i32_m32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.h
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: umov w11, v0.b[7]
; VBITS_GE_512-NEXT: umov w12, v0.b[8]
; VBITS_GE_512-NEXT: umov w13, v0.b[3]
; VBITS_GE_512-NEXT: umov w14, v0.b[4]
; VBITS_GE_512-NEXT: umov w15, v0.b[10]
; VBITS_GE_512-NEXT: umov w16, v0.b[5]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[9]
; VBITS_GE_512-NEXT: ubfiz w11, w11, #7, #1
; VBITS_GE_512-NEXT: ubfiz w12, w12, #8, #1
; VBITS_GE_512-NEXT: ubfiz w15, w15, #10, #1
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[11]
; VBITS_GE_512-NEXT: orr w11, w11, w12
; VBITS_GE_512-NEXT: umov w12, v0.b[13]
; VBITS_GE_512-NEXT: bfi w8, w13, #3, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[12]
; VBITS_GE_512-NEXT: ubfiz w9, w9, #9, #1
; VBITS_GE_512-NEXT: bfi w8, w14, #4, #1
; VBITS_GE_512-NEXT: umov w14, v0.b[14]
; VBITS_GE_512-NEXT: orr w9, w11, w9
; VBITS_GE_512-NEXT: umov w11, v0.b[6]
; VBITS_GE_512-NEXT: ubfiz w10, w10, #11, #1
; VBITS_GE_512-NEXT: orr w9, w9, w15
; VBITS_GE_512-NEXT: ubfiz w13, w13, #12, #1
; VBITS_GE_512-NEXT: bfi w8, w16, #5, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: ubfiz w10, w12, #13, #1
; VBITS_GE_512-NEXT: orr w9, w9, w13
; VBITS_GE_512-NEXT: ubfiz w12, w14, #14, #1
; VBITS_GE_512-NEXT: umov w13, v0.b[15]
; VBITS_GE_512-NEXT: bfi w8, w11, #6, #1
; VBITS_GE_512-NEXT: orr w9, w9, w10
; VBITS_GE_512-NEXT: orr w9, w9, w12
; VBITS_GE_512-NEXT: orr w8, w8, w9
; VBITS_GE_512-NEXT: orr w9, w8, w13, lsl #15
; VBITS_GE_512-NEXT: and w8, w9, #0xffff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB34_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rh { z0.h }, p1/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #2
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB34_3
; VBITS_GE_512-NEXT: b .LBB34_4
; VBITS_GE_512-NEXT: .LBB34_2:
; VBITS_GE_512-NEXT: ptrue p2.h, vl16
; VBITS_GE_512-NEXT: adrp x9, .LCPI34_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI34_0
; VBITS_GE_512-NEXT: ld1h { z0.h }, p2/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB34_4
; VBITS_GE_512-NEXT: .LBB34_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: .LBB34_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB34_20
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB34_21
; VBITS_GE_512-NEXT: .LBB34_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB34_22
; VBITS_GE_512-NEXT: .LBB34_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB34_23
; VBITS_GE_512-NEXT: .LBB34_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB34_24
; VBITS_GE_512-NEXT: .LBB34_9: // %else22
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB34_25
; VBITS_GE_512-NEXT: .LBB34_10: // %else26
; VBITS_GE_512-NEXT: tbnz w8, #8, .LBB34_26
; VBITS_GE_512-NEXT: .LBB34_11: // %else30
; VBITS_GE_512-NEXT: tbnz w8, #9, .LBB34_27
; VBITS_GE_512-NEXT: .LBB34_12: // %else34
; VBITS_GE_512-NEXT: tbnz w8, #10, .LBB34_28
; VBITS_GE_512-NEXT: .LBB34_13: // %else38
; VBITS_GE_512-NEXT: tbnz w8, #11, .LBB34_29
; VBITS_GE_512-NEXT: .LBB34_14: // %else42
; VBITS_GE_512-NEXT: tbnz w8, #12, .LBB34_30
; VBITS_GE_512-NEXT: .LBB34_15: // %else46
; VBITS_GE_512-NEXT: tbnz w8, #13, .LBB34_31
; VBITS_GE_512-NEXT: .LBB34_16: // %else50
; VBITS_GE_512-NEXT: tbnz w8, #14, .LBB34_32
; VBITS_GE_512-NEXT: .LBB34_17: // %else54
; VBITS_GE_512-NEXT: tbz w8, #15, .LBB34_19
; VBITS_GE_512-NEXT: .LBB34_18: // %cond.load57
; VBITS_GE_512-NEXT: mov w8, #15 // =0xf
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w8
; VBITS_GE_512-NEXT: ldrh w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w8
; VBITS_GE_512-NEXT: .LBB34_19: // %else58
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB34_20: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB34_6
; VBITS_GE_512-NEXT: .LBB34_21: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB34_7
; VBITS_GE_512-NEXT: .LBB34_22: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB34_8
; VBITS_GE_512-NEXT: .LBB34_23: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB34_9
; VBITS_GE_512-NEXT: .LBB34_24: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB34_10
; VBITS_GE_512-NEXT: .LBB34_25: // %cond.load25
; VBITS_GE_512-NEXT: mov w9, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #8, .LBB34_11
; VBITS_GE_512-NEXT: .LBB34_26: // %cond.load29
; VBITS_GE_512-NEXT: mov w9, #8 // =0x8
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #9, .LBB34_12
; VBITS_GE_512-NEXT: .LBB34_27: // %cond.load33
; VBITS_GE_512-NEXT: mov w9, #9 // =0x9
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #10, .LBB34_13
; VBITS_GE_512-NEXT: .LBB34_28: // %cond.load37
; VBITS_GE_512-NEXT: mov w9, #10 // =0xa
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #11, .LBB34_14
; VBITS_GE_512-NEXT: .LBB34_29: // %cond.load41
; VBITS_GE_512-NEXT: mov w9, #11 // =0xb
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #12, .LBB34_15
; VBITS_GE_512-NEXT: .LBB34_30: // %cond.load45
; VBITS_GE_512-NEXT: mov w9, #12 // =0xc
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #13, .LBB34_16
; VBITS_GE_512-NEXT: .LBB34_31: // %cond.load49
; VBITS_GE_512-NEXT: mov w9, #13 // =0xd
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #14, .LBB34_17
; VBITS_GE_512-NEXT: .LBB34_32: // %cond.load53
; VBITS_GE_512-NEXT: mov w9, #14 // =0xe
; VBITS_GE_512-NEXT: index z1.h, #0, #1
; VBITS_GE_512-NEXT: mov z2.h, w9
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: cmpeq p2.h, p1/z, z1.h, z2.h
; VBITS_GE_512-NEXT: mov z0.h, p2/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #15, .LBB34_18
; VBITS_GE_512-NEXT: b .LBB34_19
;
; CHECK-EXPAND-LABEL: masked_load_zext_v16i16i32_m32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: mov x8, #8 // =0x8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1, x8, lsl #2]
; CHECK-EXPAND-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cmpeq p2.s, p0/z, z1.s, #0
; CHECK-EXPAND-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: ptrue p1.h, vl16
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: uzp1 z1.h, z1.h, z1.h
; CHECK-EXPAND-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-EXPAND-NEXT: uzp1 z1.b, z1.b, z1.b
; CHECK-EXPAND-NEXT: mov v1.d[1], v0.d[0]
; CHECK-EXPAND-NEXT: sunpklo z0.h, z1.b
; CHECK-EXPAND-NEXT: cmpne p2.h, p1/z, z0.h, #0
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.h
; CHECK-EXPAND-NEXT: whilelo p1.h, xzr, x9
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.h, p2, z0.h
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2]
; CHECK-EXPAND-NEXT: ret
%b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %b, zeroinitializer
%load = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %ap, <16 x i1> %mask, <16 x i16> poison)
%ext = zext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, ptr %c
ret void
}
define void @masked_load_zext_v8i16i64_m64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v8i16i64_m64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0
; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s, vl4
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB35_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ldrh w9, [x0], #2
; VBITS_GE_256-NEXT: fmov s0, w9
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB35_3
; VBITS_GE_256-NEXT: b .LBB35_4
; VBITS_GE_256-NEXT: .LBB35_2:
; VBITS_GE_256-NEXT: // implicit-def: $q0
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB35_4
; VBITS_GE_256-NEXT: .LBB35_3: // %cond.load1
; VBITS_GE_256-NEXT: ld1 { v0.h }[1], [x0], #2
; VBITS_GE_256-NEXT: .LBB35_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB35_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB35_13
; VBITS_GE_256-NEXT: .LBB35_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB35_14
; VBITS_GE_256-NEXT: .LBB35_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB35_15
; VBITS_GE_256-NEXT: .LBB35_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB35_16
; VBITS_GE_256-NEXT: .LBB35_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB35_11
; VBITS_GE_256-NEXT: .LBB35_10: // %cond.load25
; VBITS_GE_256-NEXT: ld1 { v0.h }[7], [x0]
; VBITS_GE_256-NEXT: .LBB35_11: // %else26
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB35_12: // %cond.load5
; VBITS_GE_256-NEXT: ld1 { v0.h }[2], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB35_6
; VBITS_GE_256-NEXT: .LBB35_13: // %cond.load9
; VBITS_GE_256-NEXT: ld1 { v0.h }[3], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB35_7
; VBITS_GE_256-NEXT: .LBB35_14: // %cond.load13
; VBITS_GE_256-NEXT: ld1 { v0.h }[4], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB35_8
; VBITS_GE_256-NEXT: .LBB35_15: // %cond.load17
; VBITS_GE_256-NEXT: ld1 { v0.h }[5], [x0], #2
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB35_9
; VBITS_GE_256-NEXT: .LBB35_16: // %cond.load21
; VBITS_GE_256-NEXT: ld1 { v0.h }[6], [x0], #2
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB35_10
; VBITS_GE_256-NEXT: b .LBB35_11
;
; VBITS_GE_512-LABEL: masked_load_zext_v8i16i64_m64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB35_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ldrh w9, [x0], #2
; VBITS_GE_512-NEXT: fmov s0, w9
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB35_3
; VBITS_GE_512-NEXT: b .LBB35_4
; VBITS_GE_512-NEXT: .LBB35_2:
; VBITS_GE_512-NEXT: // implicit-def: $q0
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB35_4
; VBITS_GE_512-NEXT: .LBB35_3: // %cond.load1
; VBITS_GE_512-NEXT: ld1 { v0.h }[1], [x0], #2
; VBITS_GE_512-NEXT: .LBB35_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB35_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB35_13
; VBITS_GE_512-NEXT: .LBB35_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB35_14
; VBITS_GE_512-NEXT: .LBB35_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB35_15
; VBITS_GE_512-NEXT: .LBB35_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB35_16
; VBITS_GE_512-NEXT: .LBB35_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB35_11
; VBITS_GE_512-NEXT: .LBB35_10: // %cond.load25
; VBITS_GE_512-NEXT: ld1 { v0.h }[7], [x0]
; VBITS_GE_512-NEXT: .LBB35_11: // %else26
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB35_12: // %cond.load5
; VBITS_GE_512-NEXT: ld1 { v0.h }[2], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB35_6
; VBITS_GE_512-NEXT: .LBB35_13: // %cond.load9
; VBITS_GE_512-NEXT: ld1 { v0.h }[3], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB35_7
; VBITS_GE_512-NEXT: .LBB35_14: // %cond.load13
; VBITS_GE_512-NEXT: ld1 { v0.h }[4], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB35_8
; VBITS_GE_512-NEXT: .LBB35_15: // %cond.load17
; VBITS_GE_512-NEXT: ld1 { v0.h }[5], [x0], #2
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB35_9
; VBITS_GE_512-NEXT: .LBB35_16: // %cond.load21
; VBITS_GE_512-NEXT: ld1 { v0.h }[6], [x0], #2
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB35_10
; VBITS_GE_512-NEXT: b .LBB35_11
;
; CHECK-EXPAND-LABEL: masked_load_zext_v8i16i64_m64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z1.d, #0
; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: ptrue p1.s, vl4
; CHECK-EXPAND-NEXT: uzp1 z3.s, z0.s, z0.s
; CHECK-EXPAND-NEXT: uzp1 z2.s, z1.s, z1.s
; CHECK-EXPAND-NEXT: splice z0.s, p1, { z2.s, z3.s }
; CHECK-EXPAND-NEXT: ptrue p1.h, vl8
; CHECK-EXPAND-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-EXPAND-NEXT: cmpne p2.h, p1/z, z0.h, #0
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.h
; CHECK-EXPAND-NEXT: whilelo p1.h, xzr, x9
; CHECK-EXPAND-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.h, p2, z0.h
; CHECK-EXPAND-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-EXPAND-NEXT: uunpklo z0.s, z0.h
; CHECK-EXPAND-NEXT: uunpklo z1.s, z1.h
; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %b, zeroinitializer
%load = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %ap, <8 x i1> %mask, <8 x i16> poison)
%ext = zext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_zext_v8i32i64_m64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_v8i32i64_m64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0
; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s, vl4
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB36_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rw { z0.s }, p1/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #4
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB36_3
; VBITS_GE_256-NEXT: b .LBB36_4
; VBITS_GE_256-NEXT: .LBB36_2:
; VBITS_GE_256-NEXT: ptrue p2.s, vl8
; VBITS_GE_256-NEXT: adrp x9, .LCPI36_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI36_0
; VBITS_GE_256-NEXT: ld1w { z0.s }, p2/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB36_4
; VBITS_GE_256-NEXT: .LBB36_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: .LBB36_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB36_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB36_13
; VBITS_GE_256-NEXT: .LBB36_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB36_14
; VBITS_GE_256-NEXT: .LBB36_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB36_15
; VBITS_GE_256-NEXT: .LBB36_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB36_16
; VBITS_GE_256-NEXT: .LBB36_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB36_11
; VBITS_GE_256-NEXT: .LBB36_10: // %cond.load25
; VBITS_GE_256-NEXT: mov w8, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w8
; VBITS_GE_256-NEXT: ldr w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w8
; VBITS_GE_256-NEXT: .LBB36_11: // %else26
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB36_12: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB36_6
; VBITS_GE_256-NEXT: .LBB36_13: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB36_7
; VBITS_GE_256-NEXT: .LBB36_14: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB36_8
; VBITS_GE_256-NEXT: .LBB36_15: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB36_9
; VBITS_GE_256-NEXT: .LBB36_16: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB36_10
; VBITS_GE_256-NEXT: b .LBB36_11
;
; VBITS_GE_512-LABEL: masked_load_zext_v8i32i64_m64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB36_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rw { z0.s }, p1/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #4
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB36_3
; VBITS_GE_512-NEXT: b .LBB36_4
; VBITS_GE_512-NEXT: .LBB36_2:
; VBITS_GE_512-NEXT: ptrue p2.s, vl8
; VBITS_GE_512-NEXT: adrp x9, .LCPI36_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI36_0
; VBITS_GE_512-NEXT: ld1w { z0.s }, p2/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB36_4
; VBITS_GE_512-NEXT: .LBB36_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: .LBB36_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB36_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB36_13
; VBITS_GE_512-NEXT: .LBB36_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB36_14
; VBITS_GE_512-NEXT: .LBB36_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB36_15
; VBITS_GE_512-NEXT: .LBB36_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB36_16
; VBITS_GE_512-NEXT: .LBB36_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB36_11
; VBITS_GE_512-NEXT: .LBB36_10: // %cond.load25
; VBITS_GE_512-NEXT: mov w8, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w8
; VBITS_GE_512-NEXT: ldr w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w8
; VBITS_GE_512-NEXT: .LBB36_11: // %else26
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB36_12: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB36_6
; VBITS_GE_512-NEXT: .LBB36_13: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB36_7
; VBITS_GE_512-NEXT: .LBB36_14: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB36_8
; VBITS_GE_512-NEXT: .LBB36_15: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB36_9
; VBITS_GE_512-NEXT: .LBB36_16: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p2.s, p1/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p2/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB36_10
; VBITS_GE_512-NEXT: b .LBB36_11
;
; CHECK-EXPAND-LABEL: masked_load_zext_v8i32i64_m64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
; CHECK-EXPAND-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-EXPAND-NEXT: cmpeq p2.d, p0/z, z1.d, #0
; CHECK-EXPAND-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff
; CHECK-EXPAND-NEXT: ptrue p1.s, vl4
; CHECK-EXPAND-NEXT: uzp1 z3.s, z0.s, z0.s
; CHECK-EXPAND-NEXT: uzp1 z2.s, z1.s, z1.s
; CHECK-EXPAND-NEXT: splice z0.s, p1, { z2.s, z3.s }
; CHECK-EXPAND-NEXT: ptrue p1.s, vl8
; CHECK-EXPAND-NEXT: cmpne p2.s, p1/z, z0.s, #0
; CHECK-EXPAND-NEXT: cntp x9, p2, p2.s
; CHECK-EXPAND-NEXT: whilelo p1.s, xzr, x9
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.s, p2, z0.s
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %b, zeroinitializer
%load = call <8 x i32> @llvm.masked.expandload.v8i32(ptr %ap, <8 x i1> %mask, <8 x i32> poison)
%ext = zext <8 x i32> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_sext_v128i8i16(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_sext_v128i8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b, vl128
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x1]
; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, #0
; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: umov w9, v1.b[1]
; CHECK-NEXT: fmov w8, s1
; CHECK-NEXT: mov z0.b, z1.b[18]
; CHECK-NEXT: umov w10, v1.b[7]
; CHECK-NEXT: umov w11, v1.b[8]
; CHECK-NEXT: mov z2.b, z1.b[19]
; CHECK-NEXT: umov w12, v1.b[2]
; CHECK-NEXT: umov w14, v1.b[9]
; CHECK-NEXT: umov w13, v1.b[3]
; CHECK-NEXT: and x8, x8, #0x1
; CHECK-NEXT: fmov w16, s0
; CHECK-NEXT: mov z0.b, z1.b[20]
; CHECK-NEXT: bfi x8, x9, #1, #1
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: umov w15, v1.b[10]
; CHECK-NEXT: ubfiz x10, x10, #7, #1
; CHECK-NEXT: ubfiz x11, x11, #8, #1
; CHECK-NEXT: mov z2.b, z1.b[21]
; CHECK-NEXT: bfi x8, x12, #2, #1
; CHECK-NEXT: fmov w12, s0
; CHECK-NEXT: ubfiz x16, x16, #18, #1
; CHECK-NEXT: ubfiz x9, x9, #19, #1
; CHECK-NEXT: ubfiz x14, x14, #9, #1
; CHECK-NEXT: orr x10, x10, x11
; CHECK-NEXT: umov w11, v1.b[11]
; CHECK-NEXT: mov z0.b, z1.b[22]
; CHECK-NEXT: ubfiz x15, x15, #10, #1
; CHECK-NEXT: ubfiz x12, x12, #20, #1
; CHECK-NEXT: orr x9, x16, x9
; CHECK-NEXT: orr x10, x10, x14
; CHECK-NEXT: fmov w14, s2
; CHECK-NEXT: bfi x8, x13, #3, #1
; CHECK-NEXT: orr x10, x10, x15
; CHECK-NEXT: orr x9, x9, x12
; CHECK-NEXT: umov w12, v1.b[12]
; CHECK-NEXT: fmov w13, s0
; CHECK-NEXT: ubfiz x11, x11, #11, #1
; CHECK-NEXT: umov w15, v1.b[13]
; CHECK-NEXT: mov z0.b, z1.b[16]
; CHECK-NEXT: ubfiz x14, x14, #21, #1
; CHECK-NEXT: mov z2.b, z1.b[17]
; CHECK-NEXT: umov w16, v1.b[4]
; CHECK-NEXT: ubfiz x13, x13, #22, #1
; CHECK-NEXT: orr x10, x10, x11
; CHECK-NEXT: umov w11, v1.b[14]
; CHECK-NEXT: orr x9, x9, x14
; CHECK-NEXT: ubfiz x12, x12, #12, #1
; CHECK-NEXT: umov w14, v1.b[5]
; CHECK-NEXT: orr x9, x9, x13
; CHECK-NEXT: umov w13, v1.b[15]
; CHECK-NEXT: ubfiz x15, x15, #13, #1
; CHECK-NEXT: orr x10, x10, x12
; CHECK-NEXT: fmov w12, s0
; CHECK-NEXT: mov z0.b, z1.b[23]
; CHECK-NEXT: ubfiz x11, x11, #14, #1
; CHECK-NEXT: orr x10, x10, x15
; CHECK-NEXT: fmov w15, s2
; CHECK-NEXT: mov z2.b, z1.b[24]
; CHECK-NEXT: bfi x8, x16, #4, #1
; CHECK-NEXT: umov w16, v1.b[6]
; CHECK-NEXT: ubfiz x13, x13, #15, #1
; CHECK-NEXT: orr x10, x10, x11
; CHECK-NEXT: fmov w11, s0
; CHECK-NEXT: mov z0.b, z1.b[25]
; CHECK-NEXT: ubfiz x12, x12, #16, #1
; CHECK-NEXT: bfi x8, x14, #5, #1
; CHECK-NEXT: orr x10, x10, x13
; CHECK-NEXT: fmov w13, s2
; CHECK-NEXT: mov z2.b, z1.b[26]
; CHECK-NEXT: ubfiz x11, x11, #23, #1
; CHECK-NEXT: orr x10, x10, x12
; CHECK-NEXT: ubfiz x14, x15, #17, #1
; CHECK-NEXT: fmov w12, s0
; CHECK-NEXT: mov z0.b, z1.b[27]
; CHECK-NEXT: bfi x8, x16, #6, #1
; CHECK-NEXT: ubfiz x13, x13, #24, #1
; CHECK-NEXT: orr x9, x9, x11
; CHECK-NEXT: fmov w11, s2
; CHECK-NEXT: mov z2.b, z1.b[28]
; CHECK-NEXT: orr x10, x10, x14
; CHECK-NEXT: orr x9, x9, x13
; CHECK-NEXT: ubfiz x12, x12, #25, #1
; CHECK-NEXT: fmov w13, s0
; CHECK-NEXT: mov z0.b, z1.b[29]
; CHECK-NEXT: ubfiz x11, x11, #26, #1
; CHECK-NEXT: orr x8, x8, x10
; CHECK-NEXT: orr x9, x9, x12
; CHECK-NEXT: fmov w12, s2
; CHECK-NEXT: mov z2.b, z1.b[30]
; CHECK-NEXT: orr x9, x9, x11
; CHECK-NEXT: ubfiz x11, x13, #27, #1
; CHECK-NEXT: fmov w13, s0
; CHECK-NEXT: mov z0.b, z1.b[31]
; CHECK-NEXT: orr x9, x9, x11
; CHECK-NEXT: ubfiz x12, x12, #28, #1
; CHECK-NEXT: ubfiz x11, x13, #29, #1
; CHECK-NEXT: fmov w13, s2
; CHECK-NEXT: orr x9, x9, x12
; CHECK-NEXT: mov z2.b, z1.b[32]
; CHECK-NEXT: fmov w10, s0
; CHECK-NEXT: mov z0.b, z1.b[33]
; CHECK-NEXT: orr x9, x9, x11
; CHECK-NEXT: ubfiz x12, x13, #30, #1
; CHECK-NEXT: lsl w10, w10, #31
; CHECK-NEXT: orr x9, x9, x12
; CHECK-NEXT: orr x8, x8, x9
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[34]
; CHECK-NEXT: orr x8, x8, x10
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #32
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[35]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #33
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[36]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #34
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[37]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #35
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[38]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #36
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[39]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #37
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[40]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #38
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[41]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #39
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[42]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #40
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[43]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #41
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[44]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #42
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[45]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #43
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[46]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #44
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[47]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #45
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[48]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #46
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[49]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #47
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[50]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #48
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[51]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #49
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[52]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #50
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[53]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #51
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[54]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #52
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[55]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #53
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[56]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #54
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[57]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #55
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[58]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #56
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[59]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #57
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[60]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #58
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[61]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: fmov w10, s0
; CHECK-NEXT: mov z0.b, z1.b[63]
; CHECK-NEXT: orr x8, x8, x9, lsl #59
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[62]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #60
; CHECK-NEXT: and w9, w10, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #61
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #62
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: orr x8, x8, x9, lsl #63
; CHECK-NEXT: tbz w8, #0, .LBB37_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0]
; CHECK-NEXT: add x0, x0, #1
; CHECK-NEXT: tbnz w8, #1, .LBB37_3
; CHECK-NEXT: b .LBB37_4
; CHECK-NEXT: .LBB37_2:
; CHECK-NEXT: adrp x9, .LCPI37_0
; CHECK-NEXT: add x9, x9, :lo12:.LCPI37_0
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9]
; CHECK-NEXT: tbz w8, #1, .LBB37_4
; CHECK-NEXT: .LBB37_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: .LBB37_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB37_181
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB37_182
; CHECK-NEXT: .LBB37_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB37_183
; CHECK-NEXT: .LBB37_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB37_184
; CHECK-NEXT: .LBB37_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB37_185
; CHECK-NEXT: .LBB37_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB37_186
; CHECK-NEXT: .LBB37_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB37_187
; CHECK-NEXT: .LBB37_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB37_188
; CHECK-NEXT: .LBB37_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB37_189
; CHECK-NEXT: .LBB37_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB37_190
; CHECK-NEXT: .LBB37_14: // %else42
; CHECK-NEXT: tbz w8, #12, .LBB37_16
; CHECK-NEXT: .LBB37_15: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: .LBB37_16: // %else46
; CHECK-NEXT: mov w12, #71 // =0x47
; CHECK-NEXT: mov w14, #72 // =0x48
; CHECK-NEXT: mov w9, #83 // =0x53
; CHECK-NEXT: mov w10, #84 // =0x54
; CHECK-NEXT: tbz w8, #13, .LBB37_18
; CHECK-NEXT: // %bb.17: // %cond.load49
; CHECK-NEXT: mov w11, #13 // =0xd
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w11
; CHECK-NEXT: ldrb w11, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w11
; CHECK-NEXT: .LBB37_18: // %else50
; CHECK-NEXT: mov w11, #73 // =0x49
; CHECK-NEXT: mov w13, #85 // =0x55
; CHECK-NEXT: tbz w8, #14, .LBB37_20
; CHECK-NEXT: // %bb.19: // %cond.load53
; CHECK-NEXT: mov w15, #14 // =0xe
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w15
; CHECK-NEXT: ldrb w15, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w15
; CHECK-NEXT: .LBB37_20: // %else54
; CHECK-NEXT: whilels p3.b, xzr, x12
; CHECK-NEXT: whilels p4.b, xzr, x14
; CHECK-NEXT: mov w14, #86 // =0x56
; CHECK-NEXT: whilels p1.b, xzr, x9
; CHECK-NEXT: mov w9, #74 // =0x4a
; CHECK-NEXT: whilels p2.b, xzr, x10
; CHECK-NEXT: tbz w8, #15, .LBB37_22
; CHECK-NEXT: // %bb.21: // %cond.load57
; CHECK-NEXT: mov w10, #15 // =0xf
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w10
; CHECK-NEXT: ldrb w10, [x0], #1
; CHECK-NEXT: cmpeq p5.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p5/m, w10
; CHECK-NEXT: .LBB37_22: // %else58
; CHECK-NEXT: lastb w10, p3, z1.b
; CHECK-NEXT: mov w1, #75 // =0x4b
; CHECK-NEXT: mov w17, #87 // =0x57
; CHECK-NEXT: lastb w12, p4, z1.b
; CHECK-NEXT: lastb w15, p1, z1.b
; CHECK-NEXT: lastb w16, p2, z1.b
; CHECK-NEXT: whilels p2.b, xzr, x11
; CHECK-NEXT: whilels p1.b, xzr, x13
; CHECK-NEXT: tbz w8, #16, .LBB37_24
; CHECK-NEXT: // %bb.23: // %cond.load61
; CHECK-NEXT: mov w11, #16 // =0x10
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w11
; CHECK-NEXT: ldrb w11, [x0], #1
; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p3/m, w11
; CHECK-NEXT: .LBB37_24: // %else62
; CHECK-NEXT: lastb w11, p2, z1.b
; CHECK-NEXT: mov w3, #76 // =0x4c
; CHECK-NEXT: mov w18, #88 // =0x58
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: whilels p2.b, xzr, x9
; CHECK-NEXT: whilels p1.b, xzr, x14
; CHECK-NEXT: tbz w8, #17, .LBB37_26
; CHECK-NEXT: // %bb.25: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p3/m, w9
; CHECK-NEXT: .LBB37_26: // %else66
; CHECK-NEXT: lastb w9, p2, z1.b
; CHECK-NEXT: ubfiz x5, x10, #7, #1
; CHECK-NEXT: ubfiz x7, x12, #8, #1
; CHECK-NEXT: ubfiz x4, x15, #19, #1
; CHECK-NEXT: ubfiz x6, x16, #20, #1
; CHECK-NEXT: mov w15, #89 // =0x59
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: whilels p2.b, xzr, x1
; CHECK-NEXT: mov w1, #77 // =0x4d
; CHECK-NEXT: whilels p1.b, xzr, x17
; CHECK-NEXT: mov w17, #64 // =0x40
; CHECK-NEXT: tbz w8, #18, .LBB37_28
; CHECK-NEXT: // %bb.27: // %cond.load69
; CHECK-NEXT: mov w10, #18 // =0x12
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w10
; CHECK-NEXT: ldrb w10, [x0], #1
; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p3/m, w10
; CHECK-NEXT: .LBB37_28: // %else70
; CHECK-NEXT: sub sp, sp, #64
; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: lastb w10, p2, z1.b
; CHECK-NEXT: orr x7, x5, x7
; CHECK-NEXT: ubfiz x5, x13, #21, #1
; CHECK-NEXT: mov w16, #65 // =0x41
; CHECK-NEXT: orr x19, x4, x6
; CHECK-NEXT: mov w4, #90 // =0x5a
; CHECK-NEXT: lastb w12, p1, z1.b
; CHECK-NEXT: whilels p2.b, xzr, x3
; CHECK-NEXT: ubfiz x3, x11, #9, #1
; CHECK-NEXT: whilels p1.b, xzr, x18
; CHECK-NEXT: mov w18, #78 // =0x4e
; CHECK-NEXT: tbz w8, #19, .LBB37_30
; CHECK-NEXT: // %bb.29: // %cond.load73
; CHECK-NEXT: mov w11, #19 // =0x13
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w11
; CHECK-NEXT: ldrb w11, [x0], #1
; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p3/m, w11
; CHECK-NEXT: .LBB37_30: // %else74
; CHECK-NEXT: lastb w11, p2, z1.b
; CHECK-NEXT: ubfiz x21, x9, #10, #1
; CHECK-NEXT: ubfiz x6, x14, #22, #1
; CHECK-NEXT: orr x7, x7, x3
; CHECK-NEXT: mov w3, #79 // =0x4f
; CHECK-NEXT: orr x20, x19, x5
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: mov w5, #91 // =0x5b
; CHECK-NEXT: whilels p3.b, xzr, x17
; CHECK-NEXT: mov w17, #66 // =0x42
; CHECK-NEXT: whilels p2.b, xzr, x1
; CHECK-NEXT: whilels p1.b, xzr, x15
; CHECK-NEXT: tbz w8, #20, .LBB37_32
; CHECK-NEXT: // %bb.31: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p4/m, w9
; CHECK-NEXT: .LBB37_32: // %else78
; CHECK-NEXT: lastb w9, p3, z1.b
; CHECK-NEXT: orr x19, x7, x21
; CHECK-NEXT: ubfiz x21, x10, #11, #1
; CHECK-NEXT: ubfiz x7, x12, #23, #1
; CHECK-NEXT: mov w1, #67 // =0x43
; CHECK-NEXT: orr x22, x20, x6
; CHECK-NEXT: lastb w14, p2, z1.b
; CHECK-NEXT: mov w6, #92 // =0x5c
; CHECK-NEXT: lastb w15, p1, z1.b
; CHECK-NEXT: whilels p3.b, xzr, x16
; CHECK-NEXT: whilels p2.b, xzr, x18
; CHECK-NEXT: whilels p1.b, xzr, x4
; CHECK-NEXT: mov w4, #80 // =0x50
; CHECK-NEXT: tbz w8, #21, .LBB37_34
; CHECK-NEXT: // %bb.33: // %cond.load81
; CHECK-NEXT: mov w10, #21 // =0x15
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w10
; CHECK-NEXT: ldrb w10, [x0], #1
; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p4/m, w10
; CHECK-NEXT: .LBB37_34: // %else82
; CHECK-NEXT: lastb w10, p3, z1.b
; CHECK-NEXT: orr x20, x19, x21
; CHECK-NEXT: ubfiz x21, x11, #12, #1
; CHECK-NEXT: ubfiz x19, x13, #24, #1
; CHECK-NEXT: mov w18, #68 // =0x44
; CHECK-NEXT: orr x23, x22, x7
; CHECK-NEXT: lastb w12, p2, z1.b
; CHECK-NEXT: mov w7, #93 // =0x5d
; CHECK-NEXT: lastb w16, p1, z1.b
; CHECK-NEXT: whilels p3.b, xzr, x17
; CHECK-NEXT: whilels p2.b, xzr, x3
; CHECK-NEXT: whilels p1.b, xzr, x5
; CHECK-NEXT: mov w5, #81 // =0x51
; CHECK-NEXT: tbz w8, #22, .LBB37_36
; CHECK-NEXT: // %bb.35: // %cond.load85
; CHECK-NEXT: mov w11, #22 // =0x16
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w11
; CHECK-NEXT: ldrb w11, [x0], #1
; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p4/m, w11
; CHECK-NEXT: .LBB37_36: // %else86
; CHECK-NEXT: lastb w11, p3, z1.b
; CHECK-NEXT: orr x20, x20, x21
; CHECK-NEXT: ubfiz x21, x14, #13, #1
; CHECK-NEXT: ubfiz x22, x15, #25, #1
; CHECK-NEXT: and x9, x9, #0x1
; CHECK-NEXT: mov w3, #69 // =0x45
; CHECK-NEXT: lastb w13, p2, z1.b
; CHECK-NEXT: orr x24, x23, x19
; CHECK-NEXT: mov w19, #94 // =0x5e
; CHECK-NEXT: lastb w17, p1, z1.b
; CHECK-NEXT: whilels p3.b, xzr, x1
; CHECK-NEXT: whilels p2.b, xzr, x4
; CHECK-NEXT: mov w4, #82 // =0x52
; CHECK-NEXT: whilels p1.b, xzr, x6
; CHECK-NEXT: mov w6, #95 // =0x5f
; CHECK-NEXT: tbz w8, #23, .LBB37_38
; CHECK-NEXT: // %bb.37: // %cond.load89
; CHECK-NEXT: mov w14, #23 // =0x17
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p4/m, w14
; CHECK-NEXT: .LBB37_38: // %else90
; CHECK-NEXT: lastb w14, p3, z1.b
; CHECK-NEXT: bfi x9, x10, #1, #1
; CHECK-NEXT: ubfiz x23, x16, #26, #1
; CHECK-NEXT: lastb w15, p2, z1.b
; CHECK-NEXT: lastb w1, p1, z1.b
; CHECK-NEXT: whilels p3.b, xzr, x18
; CHECK-NEXT: whilels p2.b, xzr, x5
; CHECK-NEXT: ubfiz x5, x12, #14, #1
; CHECK-NEXT: mov w12, #70 // =0x46
; CHECK-NEXT: whilels p1.b, xzr, x7
; CHECK-NEXT: orr x7, x20, x21
; CHECK-NEXT: orr x20, x24, x22
; CHECK-NEXT: tbz w8, #24, .LBB37_40
; CHECK-NEXT: // %bb.39: // %cond.load93
; CHECK-NEXT: mov w10, #24 // =0x18
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w10
; CHECK-NEXT: ldrb w10, [x0], #1
; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p4/m, w10
; CHECK-NEXT: .LBB37_40: // %else94
; CHECK-NEXT: lastb w10, p3, z1.b
; CHECK-NEXT: bfi x9, x11, #2, #1
; CHECK-NEXT: orr x5, x7, x5
; CHECK-NEXT: lastb w16, p2, z1.b
; CHECK-NEXT: lastb w18, p1, z1.b
; CHECK-NEXT: whilels p4.b, xzr, x3
; CHECK-NEXT: ubfiz x3, x13, #15, #1
; CHECK-NEXT: whilels p2.b, xzr, x4
; CHECK-NEXT: ubfiz x4, x17, #27, #1
; CHECK-NEXT: whilels p1.b, xzr, x19
; CHECK-NEXT: whilels p3.b, xzr, x6
; CHECK-NEXT: orr x6, x20, x23
; CHECK-NEXT: tbz w8, #25, .LBB37_42
; CHECK-NEXT: // %bb.41: // %cond.load97
; CHECK-NEXT: mov w11, #25 // =0x19
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w11
; CHECK-NEXT: ldrb w11, [x0], #1
; CHECK-NEXT: cmpeq p5.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p5/m, w11
; CHECK-NEXT: .LBB37_42: // %else98
; CHECK-NEXT: lastb w11, p4, z1.b
; CHECK-NEXT: bfi x9, x14, #3, #1
; CHECK-NEXT: ubfiz x15, x15, #16, #1
; CHECK-NEXT: ubfiz x1, x1, #28, #1
; CHECK-NEXT: orr x3, x5, x3
; CHECK-NEXT: orr x4, x6, x4
; CHECK-NEXT: lastb w13, p2, z1.b
; CHECK-NEXT: mov w14, #96 // =0x60
; CHECK-NEXT: lastb w17, p1, z1.b
; CHECK-NEXT: lastb w7, p3, z1.b
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: tbz w8, #26, .LBB37_44
; CHECK-NEXT: // %bb.43: // %cond.load101
; CHECK-NEXT: mov w12, #26 // =0x1a
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w12
; CHECK-NEXT: ldrb w12, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w12
; CHECK-NEXT: .LBB37_44: // %else102
; CHECK-NEXT: lastb w12, p1, z1.b
; CHECK-NEXT: bfi x9, x10, #4, #1
; CHECK-NEXT: ubfiz x16, x16, #17, #1
; CHECK-NEXT: ubfiz x18, x18, #29, #1
; CHECK-NEXT: lsl w10, w7, #31
; CHECK-NEXT: orr x3, x3, x15
; CHECK-NEXT: orr x1, x4, x1
; CHECK-NEXT: mov w15, #97 // =0x61
; CHECK-NEXT: tbz w8, #27, .LBB37_46
; CHECK-NEXT: // %bb.45: // %cond.load105
; CHECK-NEXT: mov w4, #27 // =0x1b
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w4
; CHECK-NEXT: ldrb w4, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w4
; CHECK-NEXT: .LBB37_46: // %else106
; CHECK-NEXT: bfi x9, x11, #5, #1
; CHECK-NEXT: ubfiz x13, x13, #18, #1
; CHECK-NEXT: ubfiz x17, x17, #30, #1
; CHECK-NEXT: whilels p1.b, xzr, x14
; CHECK-NEXT: orr x16, x3, x16
; CHECK-NEXT: orr x18, x1, x18
; CHECK-NEXT: mov w11, #98 // =0x62
; CHECK-NEXT: tbz w8, #28, .LBB37_48
; CHECK-NEXT: // %bb.47: // %cond.load109
; CHECK-NEXT: mov w14, #28 // =0x1c
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_48: // %else110
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: bfi x9, x12, #6, #1
; CHECK-NEXT: mov w12, #99 // =0x63
; CHECK-NEXT: whilels p1.b, xzr, x15
; CHECK-NEXT: orr x15, x16, x13
; CHECK-NEXT: orr x16, x18, x17
; CHECK-NEXT: tbz w8, #29, .LBB37_50
; CHECK-NEXT: // %bb.49: // %cond.load113
; CHECK-NEXT: mov w13, #29 // =0x1d
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_50: // %else114
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x9, x15
; CHECK-NEXT: orr x10, x16, x10
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #100 // =0x64
; CHECK-NEXT: tbz w8, #30, .LBB37_52
; CHECK-NEXT: // %bb.51: // %cond.load117
; CHECK-NEXT: mov w14, #30 // =0x1e
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_52: // %else118
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #101 // =0x65
; CHECK-NEXT: tbz w8, #31, .LBB37_54
; CHECK-NEXT: // %bb.53: // %cond.load121
; CHECK-NEXT: mov w13, #31 // =0x1f
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_54: // %else122
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #32
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #102 // =0x66
; CHECK-NEXT: tbz x8, #32, .LBB37_56
; CHECK-NEXT: // %bb.55: // %cond.load125
; CHECK-NEXT: mov w14, #32 // =0x20
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_56: // %else126
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #33
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #103 // =0x67
; CHECK-NEXT: tbz x8, #33, .LBB37_58
; CHECK-NEXT: // %bb.57: // %cond.load129
; CHECK-NEXT: mov w13, #33 // =0x21
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_58: // %else130
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #34
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #104 // =0x68
; CHECK-NEXT: tbz x8, #34, .LBB37_60
; CHECK-NEXT: // %bb.59: // %cond.load133
; CHECK-NEXT: mov w14, #34 // =0x22
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_60: // %else134
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #35
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #105 // =0x69
; CHECK-NEXT: tbz x8, #35, .LBB37_62
; CHECK-NEXT: // %bb.61: // %cond.load137
; CHECK-NEXT: mov w13, #35 // =0x23
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_62: // %else138
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #36
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #106 // =0x6a
; CHECK-NEXT: tbz x8, #36, .LBB37_64
; CHECK-NEXT: // %bb.63: // %cond.load141
; CHECK-NEXT: mov w14, #36 // =0x24
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_64: // %else142
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #37
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #107 // =0x6b
; CHECK-NEXT: tbz x8, #37, .LBB37_66
; CHECK-NEXT: // %bb.65: // %cond.load145
; CHECK-NEXT: mov w13, #37 // =0x25
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_66: // %else146
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #38
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #108 // =0x6c
; CHECK-NEXT: tbz x8, #38, .LBB37_68
; CHECK-NEXT: // %bb.67: // %cond.load149
; CHECK-NEXT: mov w14, #38 // =0x26
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_68: // %else150
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #39
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #109 // =0x6d
; CHECK-NEXT: tbz x8, #39, .LBB37_70
; CHECK-NEXT: // %bb.69: // %cond.load153
; CHECK-NEXT: mov w13, #39 // =0x27
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_70: // %else154
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #40
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #110 // =0x6e
; CHECK-NEXT: tbz x8, #40, .LBB37_72
; CHECK-NEXT: // %bb.71: // %cond.load157
; CHECK-NEXT: mov w14, #40 // =0x28
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_72: // %else158
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #41
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #111 // =0x6f
; CHECK-NEXT: tbz x8, #41, .LBB37_74
; CHECK-NEXT: // %bb.73: // %cond.load161
; CHECK-NEXT: mov w13, #41 // =0x29
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_74: // %else162
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #42
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #112 // =0x70
; CHECK-NEXT: tbz x8, #42, .LBB37_76
; CHECK-NEXT: // %bb.75: // %cond.load165
; CHECK-NEXT: mov w14, #42 // =0x2a
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_76: // %else166
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #43
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #113 // =0x71
; CHECK-NEXT: tbz x8, #43, .LBB37_78
; CHECK-NEXT: // %bb.77: // %cond.load169
; CHECK-NEXT: mov w13, #43 // =0x2b
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_78: // %else170
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #44
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #114 // =0x72
; CHECK-NEXT: tbz x8, #44, .LBB37_80
; CHECK-NEXT: // %bb.79: // %cond.load173
; CHECK-NEXT: mov w14, #44 // =0x2c
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_80: // %else174
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #45
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #115 // =0x73
; CHECK-NEXT: tbz x8, #45, .LBB37_82
; CHECK-NEXT: // %bb.81: // %cond.load177
; CHECK-NEXT: mov w13, #45 // =0x2d
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_82: // %else178
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #46
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #116 // =0x74
; CHECK-NEXT: tbz x8, #46, .LBB37_84
; CHECK-NEXT: // %bb.83: // %cond.load181
; CHECK-NEXT: mov w14, #46 // =0x2e
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_84: // %else182
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #47
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #117 // =0x75
; CHECK-NEXT: tbz x8, #47, .LBB37_86
; CHECK-NEXT: // %bb.85: // %cond.load185
; CHECK-NEXT: mov w13, #47 // =0x2f
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_86: // %else186
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #48
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #118 // =0x76
; CHECK-NEXT: tbz x8, #48, .LBB37_88
; CHECK-NEXT: // %bb.87: // %cond.load189
; CHECK-NEXT: mov w14, #48 // =0x30
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_88: // %else190
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #49
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #119 // =0x77
; CHECK-NEXT: tbz x8, #49, .LBB37_90
; CHECK-NEXT: // %bb.89: // %cond.load193
; CHECK-NEXT: mov w13, #49 // =0x31
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_90: // %else194
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #50
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #120 // =0x78
; CHECK-NEXT: tbz x8, #50, .LBB37_92
; CHECK-NEXT: // %bb.91: // %cond.load197
; CHECK-NEXT: mov w14, #50 // =0x32
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_92: // %else198
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #51
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #121 // =0x79
; CHECK-NEXT: tbz x8, #51, .LBB37_94
; CHECK-NEXT: // %bb.93: // %cond.load201
; CHECK-NEXT: mov w13, #51 // =0x33
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_94: // %else202
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #52
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #122 // =0x7a
; CHECK-NEXT: tbz x8, #52, .LBB37_96
; CHECK-NEXT: // %bb.95: // %cond.load205
; CHECK-NEXT: mov w14, #52 // =0x34
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_96: // %else206
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #53
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #123 // =0x7b
; CHECK-NEXT: tbz x8, #53, .LBB37_98
; CHECK-NEXT: // %bb.97: // %cond.load209
; CHECK-NEXT: mov w13, #53 // =0x35
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_98: // %else210
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #54
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #124 // =0x7c
; CHECK-NEXT: tbz x8, #54, .LBB37_100
; CHECK-NEXT: // %bb.99: // %cond.load213
; CHECK-NEXT: mov w14, #54 // =0x36
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_100: // %else214
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #55
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #125 // =0x7d
; CHECK-NEXT: tbz x8, #55, .LBB37_102
; CHECK-NEXT: // %bb.101: // %cond.load217
; CHECK-NEXT: mov w13, #55 // =0x37
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB37_102: // %else218
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #56
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #126 // =0x7e
; CHECK-NEXT: tbz x8, #56, .LBB37_104
; CHECK-NEXT: // %bb.103: // %cond.load221
; CHECK-NEXT: mov w14, #56 // =0x38
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_104: // %else222
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #57
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: tbz x8, #57, .LBB37_106
; CHECK-NEXT: // %bb.105: // %cond.load225
; CHECK-NEXT: mov w12, #57 // =0x39
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w12
; CHECK-NEXT: ldrb w12, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w12
; CHECK-NEXT: .LBB37_106: // %else226
; CHECK-NEXT: lastb w12, p1, z1.b
; CHECK-NEXT: orr x13, x15, x9, lsl #58
; CHECK-NEXT: mov w9, #127 // =0x7f
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: and w11, w14, #0x1
; CHECK-NEXT: tbz x8, #58, .LBB37_108
; CHECK-NEXT: // %bb.107: // %cond.load229
; CHECK-NEXT: mov w14, #58 // =0x3a
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB37_108: // %else230
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x13, x13, x10, lsl #59
; CHECK-NEXT: and w10, w12, #0x1
; CHECK-NEXT: tbz x8, #59, .LBB37_110
; CHECK-NEXT: // %bb.109: // %cond.load233
; CHECK-NEXT: mov w12, #59 // =0x3b
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w12
; CHECK-NEXT: ldrb w12, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w12
; CHECK-NEXT: .LBB37_110: // %else234
; CHECK-NEXT: orr x12, x13, x11, lsl #60
; CHECK-NEXT: whilels p1.b, xzr, x9
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: tbz x8, #60, .LBB37_112
; CHECK-NEXT: // %bb.111: // %cond.load237
; CHECK-NEXT: mov w11, #60 // =0x3c
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w11
; CHECK-NEXT: ldrb w11, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w11
; CHECK-NEXT: .LBB37_112: // %else238
; CHECK-NEXT: lastb w11, p1, z1.b
; CHECK-NEXT: orr x10, x12, x10, lsl #61
; CHECK-NEXT: tbnz x8, #61, .LBB37_191
; CHECK-NEXT: // %bb.113: // %else242
; CHECK-NEXT: orr x9, x10, x9, lsl #62
; CHECK-NEXT: tbnz x8, #62, .LBB37_192
; CHECK-NEXT: .LBB37_114: // %else246
; CHECK-NEXT: orr x9, x9, x11, lsl #63
; CHECK-NEXT: tbnz x8, #63, .LBB37_193
; CHECK-NEXT: .LBB37_115: // %else250
; CHECK-NEXT: tbnz w9, #0, .LBB37_194
; CHECK-NEXT: .LBB37_116: // %else254
; CHECK-NEXT: tbnz w9, #1, .LBB37_195
; CHECK-NEXT: .LBB37_117: // %else258
; CHECK-NEXT: tbnz w9, #2, .LBB37_196
; CHECK-NEXT: .LBB37_118: // %else262
; CHECK-NEXT: tbnz w9, #3, .LBB37_197
; CHECK-NEXT: .LBB37_119: // %else266
; CHECK-NEXT: tbnz w9, #4, .LBB37_198
; CHECK-NEXT: .LBB37_120: // %else270
; CHECK-NEXT: tbnz w9, #5, .LBB37_199
; CHECK-NEXT: .LBB37_121: // %else274
; CHECK-NEXT: tbnz w9, #6, .LBB37_200
; CHECK-NEXT: .LBB37_122: // %else278
; CHECK-NEXT: tbnz w9, #7, .LBB37_201
; CHECK-NEXT: .LBB37_123: // %else282
; CHECK-NEXT: tbnz w9, #8, .LBB37_202
; CHECK-NEXT: .LBB37_124: // %else286
; CHECK-NEXT: tbnz w9, #9, .LBB37_203
; CHECK-NEXT: .LBB37_125: // %else290
; CHECK-NEXT: tbnz w9, #10, .LBB37_204
; CHECK-NEXT: .LBB37_126: // %else294
; CHECK-NEXT: tbnz w9, #11, .LBB37_205
; CHECK-NEXT: .LBB37_127: // %else298
; CHECK-NEXT: tbnz w9, #12, .LBB37_206
; CHECK-NEXT: .LBB37_128: // %else302
; CHECK-NEXT: tbnz w9, #13, .LBB37_207
; CHECK-NEXT: .LBB37_129: // %else306
; CHECK-NEXT: tbnz w9, #14, .LBB37_208
; CHECK-NEXT: .LBB37_130: // %else310
; CHECK-NEXT: tbnz w9, #15, .LBB37_209
; CHECK-NEXT: .LBB37_131: // %else314
; CHECK-NEXT: tbnz w9, #16, .LBB37_210
; CHECK-NEXT: .LBB37_132: // %else318
; CHECK-NEXT: tbnz w9, #17, .LBB37_211
; CHECK-NEXT: .LBB37_133: // %else322
; CHECK-NEXT: tbnz w9, #18, .LBB37_212
; CHECK-NEXT: .LBB37_134: // %else326
; CHECK-NEXT: tbnz w9, #19, .LBB37_213
; CHECK-NEXT: .LBB37_135: // %else330
; CHECK-NEXT: tbnz w9, #20, .LBB37_214
; CHECK-NEXT: .LBB37_136: // %else334
; CHECK-NEXT: tbnz w9, #21, .LBB37_215
; CHECK-NEXT: .LBB37_137: // %else338
; CHECK-NEXT: tbnz w9, #22, .LBB37_216
; CHECK-NEXT: .LBB37_138: // %else342
; CHECK-NEXT: tbnz w9, #23, .LBB37_217
; CHECK-NEXT: .LBB37_139: // %else346
; CHECK-NEXT: tbnz w9, #24, .LBB37_218
; CHECK-NEXT: .LBB37_140: // %else350
; CHECK-NEXT: tbnz w9, #25, .LBB37_219
; CHECK-NEXT: .LBB37_141: // %else354
; CHECK-NEXT: tbnz w9, #26, .LBB37_220
; CHECK-NEXT: .LBB37_142: // %else358
; CHECK-NEXT: tbnz w9, #27, .LBB37_221
; CHECK-NEXT: .LBB37_143: // %else362
; CHECK-NEXT: tbnz w9, #28, .LBB37_222
; CHECK-NEXT: .LBB37_144: // %else366
; CHECK-NEXT: tbnz w9, #29, .LBB37_223
; CHECK-NEXT: .LBB37_145: // %else370
; CHECK-NEXT: tbnz w9, #30, .LBB37_224
; CHECK-NEXT: .LBB37_146: // %else374
; CHECK-NEXT: tbnz w9, #31, .LBB37_225
; CHECK-NEXT: .LBB37_147: // %else378
; CHECK-NEXT: tbnz x9, #32, .LBB37_226
; CHECK-NEXT: .LBB37_148: // %else382
; CHECK-NEXT: tbnz x9, #33, .LBB37_227
; CHECK-NEXT: .LBB37_149: // %else386
; CHECK-NEXT: tbnz x9, #34, .LBB37_228
; CHECK-NEXT: .LBB37_150: // %else390
; CHECK-NEXT: tbnz x9, #35, .LBB37_229
; CHECK-NEXT: .LBB37_151: // %else394
; CHECK-NEXT: tbnz x9, #36, .LBB37_230
; CHECK-NEXT: .LBB37_152: // %else398
; CHECK-NEXT: tbnz x9, #37, .LBB37_231
; CHECK-NEXT: .LBB37_153: // %else402
; CHECK-NEXT: tbnz x9, #38, .LBB37_232
; CHECK-NEXT: .LBB37_154: // %else406
; CHECK-NEXT: tbnz x9, #39, .LBB37_233
; CHECK-NEXT: .LBB37_155: // %else410
; CHECK-NEXT: tbnz x9, #40, .LBB37_234
; CHECK-NEXT: .LBB37_156: // %else414
; CHECK-NEXT: tbnz x9, #41, .LBB37_235
; CHECK-NEXT: .LBB37_157: // %else418
; CHECK-NEXT: tbnz x9, #42, .LBB37_236
; CHECK-NEXT: .LBB37_158: // %else422
; CHECK-NEXT: tbnz x9, #43, .LBB37_237
; CHECK-NEXT: .LBB37_159: // %else426
; CHECK-NEXT: tbnz x9, #44, .LBB37_238
; CHECK-NEXT: .LBB37_160: // %else430
; CHECK-NEXT: tbnz x9, #45, .LBB37_239
; CHECK-NEXT: .LBB37_161: // %else434
; CHECK-NEXT: tbnz x9, #46, .LBB37_240
; CHECK-NEXT: .LBB37_162: // %else438
; CHECK-NEXT: tbnz x9, #47, .LBB37_241
; CHECK-NEXT: .LBB37_163: // %else442
; CHECK-NEXT: tbnz x9, #48, .LBB37_242
; CHECK-NEXT: .LBB37_164: // %else446
; CHECK-NEXT: tbnz x9, #49, .LBB37_243
; CHECK-NEXT: .LBB37_165: // %else450
; CHECK-NEXT: tbnz x9, #50, .LBB37_244
; CHECK-NEXT: .LBB37_166: // %else454
; CHECK-NEXT: tbnz x9, #51, .LBB37_245
; CHECK-NEXT: .LBB37_167: // %else458
; CHECK-NEXT: tbnz x9, #52, .LBB37_246
; CHECK-NEXT: .LBB37_168: // %else462
; CHECK-NEXT: tbnz x9, #53, .LBB37_247
; CHECK-NEXT: .LBB37_169: // %else466
; CHECK-NEXT: tbnz x9, #54, .LBB37_248
; CHECK-NEXT: .LBB37_170: // %else470
; CHECK-NEXT: tbnz x9, #55, .LBB37_249
; CHECK-NEXT: .LBB37_171: // %else474
; CHECK-NEXT: tbnz x9, #56, .LBB37_250
; CHECK-NEXT: .LBB37_172: // %else478
; CHECK-NEXT: tbnz x9, #57, .LBB37_251
; CHECK-NEXT: .LBB37_173: // %else482
; CHECK-NEXT: tbnz x9, #58, .LBB37_252
; CHECK-NEXT: .LBB37_174: // %else486
; CHECK-NEXT: tbnz x9, #59, .LBB37_253
; CHECK-NEXT: .LBB37_175: // %else490
; CHECK-NEXT: tbnz x9, #60, .LBB37_254
; CHECK-NEXT: .LBB37_176: // %else494
; CHECK-NEXT: tbnz x9, #61, .LBB37_255
; CHECK-NEXT: .LBB37_177: // %else498
; CHECK-NEXT: tbnz x9, #62, .LBB37_256
; CHECK-NEXT: .LBB37_178: // %else502
; CHECK-NEXT: tbz x9, #63, .LBB37_180
; CHECK-NEXT: .LBB37_179: // %cond.load505
; CHECK-NEXT: mov w8, #127 // =0x7f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: .LBB37_180: // %else506
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB37_181: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #3, .LBB37_6
; CHECK-NEXT: .LBB37_182: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #4, .LBB37_7
; CHECK-NEXT: .LBB37_183: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #5, .LBB37_8
; CHECK-NEXT: .LBB37_184: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #6, .LBB37_9
; CHECK-NEXT: .LBB37_185: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #7, .LBB37_10
; CHECK-NEXT: .LBB37_186: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #8, .LBB37_11
; CHECK-NEXT: .LBB37_187: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #9, .LBB37_12
; CHECK-NEXT: .LBB37_188: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #10, .LBB37_13
; CHECK-NEXT: .LBB37_189: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #11, .LBB37_14
; CHECK-NEXT: .LBB37_190: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbnz w8, #12, .LBB37_15
; CHECK-NEXT: b .LBB37_16
; CHECK-NEXT: .LBB37_191: // %cond.load241
; CHECK-NEXT: mov w12, #61 // =0x3d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w12
; CHECK-NEXT: ldrb w12, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w12
; CHECK-NEXT: orr x9, x10, x9, lsl #62
; CHECK-NEXT: tbz x8, #62, .LBB37_114
; CHECK-NEXT: .LBB37_192: // %cond.load245
; CHECK-NEXT: mov w10, #62 // =0x3e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w10
; CHECK-NEXT: ldrb w10, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w10
; CHECK-NEXT: orr x9, x9, x11, lsl #63
; CHECK-NEXT: tbz x8, #63, .LBB37_115
; CHECK-NEXT: .LBB37_193: // %cond.load249
; CHECK-NEXT: mov w8, #63 // =0x3f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #0, .LBB37_116
; CHECK-NEXT: .LBB37_194: // %cond.load253
; CHECK-NEXT: mov w8, #64 // =0x40
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #1, .LBB37_117
; CHECK-NEXT: .LBB37_195: // %cond.load257
; CHECK-NEXT: mov w8, #65 // =0x41
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #2, .LBB37_118
; CHECK-NEXT: .LBB37_196: // %cond.load261
; CHECK-NEXT: mov w8, #66 // =0x42
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #3, .LBB37_119
; CHECK-NEXT: .LBB37_197: // %cond.load265
; CHECK-NEXT: mov w8, #67 // =0x43
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #4, .LBB37_120
; CHECK-NEXT: .LBB37_198: // %cond.load269
; CHECK-NEXT: mov w8, #68 // =0x44
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #5, .LBB37_121
; CHECK-NEXT: .LBB37_199: // %cond.load273
; CHECK-NEXT: mov w8, #69 // =0x45
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #6, .LBB37_122
; CHECK-NEXT: .LBB37_200: // %cond.load277
; CHECK-NEXT: mov w8, #70 // =0x46
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #7, .LBB37_123
; CHECK-NEXT: .LBB37_201: // %cond.load281
; CHECK-NEXT: mov w8, #71 // =0x47
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #8, .LBB37_124
; CHECK-NEXT: .LBB37_202: // %cond.load285
; CHECK-NEXT: mov w8, #72 // =0x48
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #9, .LBB37_125
; CHECK-NEXT: .LBB37_203: // %cond.load289
; CHECK-NEXT: mov w8, #73 // =0x49
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #10, .LBB37_126
; CHECK-NEXT: .LBB37_204: // %cond.load293
; CHECK-NEXT: mov w8, #74 // =0x4a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #11, .LBB37_127
; CHECK-NEXT: .LBB37_205: // %cond.load297
; CHECK-NEXT: mov w8, #75 // =0x4b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #12, .LBB37_128
; CHECK-NEXT: .LBB37_206: // %cond.load301
; CHECK-NEXT: mov w8, #76 // =0x4c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #13, .LBB37_129
; CHECK-NEXT: .LBB37_207: // %cond.load305
; CHECK-NEXT: mov w8, #77 // =0x4d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #14, .LBB37_130
; CHECK-NEXT: .LBB37_208: // %cond.load309
; CHECK-NEXT: mov w8, #78 // =0x4e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #15, .LBB37_131
; CHECK-NEXT: .LBB37_209: // %cond.load313
; CHECK-NEXT: mov w8, #79 // =0x4f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #16, .LBB37_132
; CHECK-NEXT: .LBB37_210: // %cond.load317
; CHECK-NEXT: mov w8, #80 // =0x50
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #17, .LBB37_133
; CHECK-NEXT: .LBB37_211: // %cond.load321
; CHECK-NEXT: mov w8, #81 // =0x51
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #18, .LBB37_134
; CHECK-NEXT: .LBB37_212: // %cond.load325
; CHECK-NEXT: mov w8, #82 // =0x52
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #19, .LBB37_135
; CHECK-NEXT: .LBB37_213: // %cond.load329
; CHECK-NEXT: mov w8, #83 // =0x53
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #20, .LBB37_136
; CHECK-NEXT: .LBB37_214: // %cond.load333
; CHECK-NEXT: mov w8, #84 // =0x54
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #21, .LBB37_137
; CHECK-NEXT: .LBB37_215: // %cond.load337
; CHECK-NEXT: mov w8, #85 // =0x55
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #22, .LBB37_138
; CHECK-NEXT: .LBB37_216: // %cond.load341
; CHECK-NEXT: mov w8, #86 // =0x56
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #23, .LBB37_139
; CHECK-NEXT: .LBB37_217: // %cond.load345
; CHECK-NEXT: mov w8, #87 // =0x57
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #24, .LBB37_140
; CHECK-NEXT: .LBB37_218: // %cond.load349
; CHECK-NEXT: mov w8, #88 // =0x58
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #25, .LBB37_141
; CHECK-NEXT: .LBB37_219: // %cond.load353
; CHECK-NEXT: mov w8, #89 // =0x59
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #26, .LBB37_142
; CHECK-NEXT: .LBB37_220: // %cond.load357
; CHECK-NEXT: mov w8, #90 // =0x5a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #27, .LBB37_143
; CHECK-NEXT: .LBB37_221: // %cond.load361
; CHECK-NEXT: mov w8, #91 // =0x5b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #28, .LBB37_144
; CHECK-NEXT: .LBB37_222: // %cond.load365
; CHECK-NEXT: mov w8, #92 // =0x5c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #29, .LBB37_145
; CHECK-NEXT: .LBB37_223: // %cond.load369
; CHECK-NEXT: mov w8, #93 // =0x5d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #30, .LBB37_146
; CHECK-NEXT: .LBB37_224: // %cond.load373
; CHECK-NEXT: mov w8, #94 // =0x5e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #31, .LBB37_147
; CHECK-NEXT: .LBB37_225: // %cond.load377
; CHECK-NEXT: mov w8, #95 // =0x5f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #32, .LBB37_148
; CHECK-NEXT: .LBB37_226: // %cond.load381
; CHECK-NEXT: mov w8, #96 // =0x60
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #33, .LBB37_149
; CHECK-NEXT: .LBB37_227: // %cond.load385
; CHECK-NEXT: mov w8, #97 // =0x61
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #34, .LBB37_150
; CHECK-NEXT: .LBB37_228: // %cond.load389
; CHECK-NEXT: mov w8, #98 // =0x62
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #35, .LBB37_151
; CHECK-NEXT: .LBB37_229: // %cond.load393
; CHECK-NEXT: mov w8, #99 // =0x63
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #36, .LBB37_152
; CHECK-NEXT: .LBB37_230: // %cond.load397
; CHECK-NEXT: mov w8, #100 // =0x64
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #37, .LBB37_153
; CHECK-NEXT: .LBB37_231: // %cond.load401
; CHECK-NEXT: mov w8, #101 // =0x65
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #38, .LBB37_154
; CHECK-NEXT: .LBB37_232: // %cond.load405
; CHECK-NEXT: mov w8, #102 // =0x66
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #39, .LBB37_155
; CHECK-NEXT: .LBB37_233: // %cond.load409
; CHECK-NEXT: mov w8, #103 // =0x67
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #40, .LBB37_156
; CHECK-NEXT: .LBB37_234: // %cond.load413
; CHECK-NEXT: mov w8, #104 // =0x68
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #41, .LBB37_157
; CHECK-NEXT: .LBB37_235: // %cond.load417
; CHECK-NEXT: mov w8, #105 // =0x69
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #42, .LBB37_158
; CHECK-NEXT: .LBB37_236: // %cond.load421
; CHECK-NEXT: mov w8, #106 // =0x6a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #43, .LBB37_159
; CHECK-NEXT: .LBB37_237: // %cond.load425
; CHECK-NEXT: mov w8, #107 // =0x6b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #44, .LBB37_160
; CHECK-NEXT: .LBB37_238: // %cond.load429
; CHECK-NEXT: mov w8, #108 // =0x6c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #45, .LBB37_161
; CHECK-NEXT: .LBB37_239: // %cond.load433
; CHECK-NEXT: mov w8, #109 // =0x6d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #46, .LBB37_162
; CHECK-NEXT: .LBB37_240: // %cond.load437
; CHECK-NEXT: mov w8, #110 // =0x6e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #47, .LBB37_163
; CHECK-NEXT: .LBB37_241: // %cond.load441
; CHECK-NEXT: mov w8, #111 // =0x6f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #48, .LBB37_164
; CHECK-NEXT: .LBB37_242: // %cond.load445
; CHECK-NEXT: mov w8, #112 // =0x70
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #49, .LBB37_165
; CHECK-NEXT: .LBB37_243: // %cond.load449
; CHECK-NEXT: mov w8, #113 // =0x71
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #50, .LBB37_166
; CHECK-NEXT: .LBB37_244: // %cond.load453
; CHECK-NEXT: mov w8, #114 // =0x72
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #51, .LBB37_167
; CHECK-NEXT: .LBB37_245: // %cond.load457
; CHECK-NEXT: mov w8, #115 // =0x73
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #52, .LBB37_168
; CHECK-NEXT: .LBB37_246: // %cond.load461
; CHECK-NEXT: mov w8, #116 // =0x74
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #53, .LBB37_169
; CHECK-NEXT: .LBB37_247: // %cond.load465
; CHECK-NEXT: mov w8, #117 // =0x75
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #54, .LBB37_170
; CHECK-NEXT: .LBB37_248: // %cond.load469
; CHECK-NEXT: mov w8, #118 // =0x76
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #55, .LBB37_171
; CHECK-NEXT: .LBB37_249: // %cond.load473
; CHECK-NEXT: mov w8, #119 // =0x77
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #56, .LBB37_172
; CHECK-NEXT: .LBB37_250: // %cond.load477
; CHECK-NEXT: mov w8, #120 // =0x78
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #57, .LBB37_173
; CHECK-NEXT: .LBB37_251: // %cond.load481
; CHECK-NEXT: mov w8, #121 // =0x79
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #58, .LBB37_174
; CHECK-NEXT: .LBB37_252: // %cond.load485
; CHECK-NEXT: mov w8, #122 // =0x7a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #59, .LBB37_175
; CHECK-NEXT: .LBB37_253: // %cond.load489
; CHECK-NEXT: mov w8, #123 // =0x7b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #60, .LBB37_176
; CHECK-NEXT: .LBB37_254: // %cond.load493
; CHECK-NEXT: mov w8, #124 // =0x7c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #61, .LBB37_177
; CHECK-NEXT: .LBB37_255: // %cond.load497
; CHECK-NEXT: mov w8, #125 // =0x7d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #62, .LBB37_178
; CHECK-NEXT: .LBB37_256: // %cond.load501
; CHECK-NEXT: mov w8, #126 // =0x7e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbnz x9, #63, .LBB37_179
; CHECK-NEXT: b .LBB37_180
;
; CHECK-EXPAND-LABEL: masked_load_sext_v128i8i16:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.h, vl128
; CHECK-EXPAND-NEXT: ld1b { z0.h }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h
; CHECK-EXPAND-NEXT: whilelo p2.h, xzr, x8
; CHECK-EXPAND-NEXT: ld1sb { z0.h }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h
; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%b = load <128 x i8>, ptr %bp
%mask = icmp eq <128 x i8> %b, zeroinitializer
%load = call <128 x i8> @llvm.masked.expandload.v128i8(ptr %ap, <128 x i1> %mask, <128 x i8> poison)
%ext = sext <128 x i8> %load to <128 x i16>
store <128 x i16> %ext, ptr %c
ret void
}
define void @masked_load_sext_v64i8i32(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_sext_v64i8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #112
; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: ptrue p1.b, vl64
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x1]
; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, #0
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: umov w11, v0.b[1]
; CHECK-NEXT: fmov w22, s0
; CHECK-NEXT: umov w12, v0.b[2]
; CHECK-NEXT: umov w13, v0.b[3]
; CHECK-NEXT: umov w14, v0.b[7]
; CHECK-NEXT: umov w1, v0.b[8]
; CHECK-NEXT: umov w16, v0.b[9]
; CHECK-NEXT: mov z3.b, z0.b[18]
; CHECK-NEXT: mov z5.b, z0.b[19]
; CHECK-NEXT: and x22, x22, #0x1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: umov w17, v0.b[10]
; CHECK-NEXT: bfi x22, x11, #1, #1
; CHECK-NEXT: mov z6.b, z0.b[20]
; CHECK-NEXT: umov w3, v0.b[11]
; CHECK-NEXT: mov z4.b, z0.b[21]
; CHECK-NEXT: umov w9, v0.b[5]
; CHECK-NEXT: mov z7.b, z0.b[22]
; CHECK-NEXT: bfi x22, x12, #2, #1
; CHECK-NEXT: fmov w19, s3
; CHECK-NEXT: fmov w20, s5
; CHECK-NEXT: ubfiz x14, x14, #7, #1
; CHECK-NEXT: ubfiz x1, x1, #8, #1
; CHECK-NEXT: umov w4, v0.b[12]
; CHECK-NEXT: bfi x22, x13, #3, #1
; CHECK-NEXT: mov z16.b, z0.b[23]
; CHECK-NEXT: fmov w21, s6
; CHECK-NEXT: ubfiz x16, x16, #9, #1
; CHECK-NEXT: umov w8, v0.b[6]
; CHECK-NEXT: umov w5, v0.b[13]
; CHECK-NEXT: mov z17.b, z0.b[24]
; CHECK-NEXT: fmov w23, s4
; CHECK-NEXT: orr x14, x14, x1
; CHECK-NEXT: bfi x22, x10, #4, #1
; CHECK-NEXT: ubfiz x10, x17, #10, #1
; CHECK-NEXT: mov z18.b, z0.b[25]
; CHECK-NEXT: fmov w24, s7
; CHECK-NEXT: ubfiz x13, x19, #18, #1
; CHECK-NEXT: ubfiz x19, x20, #19, #1
; CHECK-NEXT: orr x14, x14, x16
; CHECK-NEXT: ubfiz x16, x3, #11, #1
; CHECK-NEXT: umov w15, v0.b[14]
; CHECK-NEXT: mov z19.b, z0.b[26]
; CHECK-NEXT: fmov w25, s16
; CHECK-NEXT: ubfiz x1, x21, #20, #1
; CHECK-NEXT: orr x10, x14, x10
; CHECK-NEXT: bfi x22, x9, #5, #1
; CHECK-NEXT: mov z20.b, z0.b[27]
; CHECK-NEXT: fmov w26, s17
; CHECK-NEXT: orr x13, x13, x19
; CHECK-NEXT: ubfiz x9, x4, #12, #1
; CHECK-NEXT: orr x10, x10, x16
; CHECK-NEXT: ubfiz x16, x23, #21, #1
; CHECK-NEXT: umov w18, v0.b[15]
; CHECK-NEXT: mov z1.b, z0.b[16]
; CHECK-NEXT: mov z21.b, z0.b[28]
; CHECK-NEXT: fmov w11, s18
; CHECK-NEXT: orr x13, x13, x1
; CHECK-NEXT: ubfiz x14, x5, #13, #1
; CHECK-NEXT: bfi x22, x8, #6, #1
; CHECK-NEXT: ubfiz x8, x24, #22, #1
; CHECK-NEXT: mov z2.b, z0.b[17]
; CHECK-NEXT: mov z22.b, z0.b[29]
; CHECK-NEXT: fmov w27, s19
; CHECK-NEXT: orr x9, x10, x9
; CHECK-NEXT: orr x10, x13, x16
; CHECK-NEXT: ubfiz x13, x25, #23, #1
; CHECK-NEXT: mov z5.b, z0.b[30]
; CHECK-NEXT: fmov w28, s20
; CHECK-NEXT: orr x9, x9, x14
; CHECK-NEXT: orr x8, x10, x8
; CHECK-NEXT: ubfiz x10, x15, #14, #1
; CHECK-NEXT: ubfiz x14, x26, #24, #1
; CHECK-NEXT: fmov w6, s1
; CHECK-NEXT: fmov w29, s21
; CHECK-NEXT: orr x8, x8, x13
; CHECK-NEXT: ubfiz x11, x11, #25, #1
; CHECK-NEXT: fmov w7, s2
; CHECK-NEXT: fmov w30, s22
; CHECK-NEXT: ubfiz x13, x18, #15, #1
; CHECK-NEXT: orr x9, x9, x10
; CHECK-NEXT: orr x8, x8, x14
; CHECK-NEXT: ubfiz x10, x27, #26, #1
; CHECK-NEXT: fmov w12, s5
; CHECK-NEXT: orr x8, x8, x11
; CHECK-NEXT: ubfiz x11, x28, #27, #1
; CHECK-NEXT: mov z3.b, z0.b[31]
; CHECK-NEXT: orr x9, x9, x13
; CHECK-NEXT: orr x8, x8, x10
; CHECK-NEXT: ubfiz x10, x6, #16, #1
; CHECK-NEXT: ubfiz x13, x29, #28, #1
; CHECK-NEXT: orr x8, x8, x11
; CHECK-NEXT: ubfiz x11, x7, #17, #1
; CHECK-NEXT: ubfiz x14, x30, #29, #1
; CHECK-NEXT: mov z2.b, z0.b[32]
; CHECK-NEXT: orr x9, x9, x10
; CHECK-NEXT: orr x8, x8, x13
; CHECK-NEXT: ubfiz x10, x12, #30, #1
; CHECK-NEXT: fmov w12, s3
; CHECK-NEXT: orr x9, x9, x11
; CHECK-NEXT: orr x8, x8, x14
; CHECK-NEXT: mov z1.b, z0.b[33]
; CHECK-NEXT: orr x9, x22, x9
; CHECK-NEXT: orr x8, x8, x10
; CHECK-NEXT: orr x8, x9, x8
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: lsl w10, w12, #31
; CHECK-NEXT: mov z2.b, z0.b[34]
; CHECK-NEXT: orr x8, x8, x10
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #32
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[35]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #33
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[36]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #34
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[37]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #35
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[38]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #36
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[39]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #37
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[40]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #38
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[41]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #39
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[42]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #40
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[43]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #41
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[44]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #42
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[45]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #43
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[46]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #44
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[47]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #45
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[48]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #46
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[49]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #47
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[50]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #48
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[51]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #49
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[52]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #50
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[53]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #51
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[54]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #52
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[55]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #53
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[56]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #54
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[57]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #55
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[58]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #56
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[59]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #57
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[60]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #58
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[61]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: fmov w10, s1
; CHECK-NEXT: orr x8, x8, x9, lsl #59
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[62]
; CHECK-NEXT: mov z0.b, z0.b[63]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #60
; CHECK-NEXT: and w9, w10, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #61
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #62
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: orr x8, x8, x9, lsl #63
; CHECK-NEXT: tbz w8, #0, .LBB38_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0]
; CHECK-NEXT: add x0, x0, #1
; CHECK-NEXT: tbnz w8, #1, .LBB38_3
; CHECK-NEXT: b .LBB38_4
; CHECK-NEXT: .LBB38_2:
; CHECK-NEXT: adrp x9, .LCPI38_0
; CHECK-NEXT: add x9, x9, :lo12:.LCPI38_0
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9]
; CHECK-NEXT: tbz w8, #1, .LBB38_4
; CHECK-NEXT: .LBB38_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: .LBB38_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB38_68
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB38_69
; CHECK-NEXT: .LBB38_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB38_70
; CHECK-NEXT: .LBB38_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB38_71
; CHECK-NEXT: .LBB38_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB38_72
; CHECK-NEXT: .LBB38_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB38_73
; CHECK-NEXT: .LBB38_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB38_74
; CHECK-NEXT: .LBB38_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB38_75
; CHECK-NEXT: .LBB38_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB38_76
; CHECK-NEXT: .LBB38_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB38_77
; CHECK-NEXT: .LBB38_14: // %else42
; CHECK-NEXT: tbnz w8, #12, .LBB38_78
; CHECK-NEXT: .LBB38_15: // %else46
; CHECK-NEXT: tbnz w8, #13, .LBB38_79
; CHECK-NEXT: .LBB38_16: // %else50
; CHECK-NEXT: tbnz w8, #14, .LBB38_80
; CHECK-NEXT: .LBB38_17: // %else54
; CHECK-NEXT: tbnz w8, #15, .LBB38_81
; CHECK-NEXT: .LBB38_18: // %else58
; CHECK-NEXT: tbnz w8, #16, .LBB38_82
; CHECK-NEXT: .LBB38_19: // %else62
; CHECK-NEXT: tbnz w8, #17, .LBB38_83
; CHECK-NEXT: .LBB38_20: // %else66
; CHECK-NEXT: tbnz w8, #18, .LBB38_84
; CHECK-NEXT: .LBB38_21: // %else70
; CHECK-NEXT: tbnz w8, #19, .LBB38_85
; CHECK-NEXT: .LBB38_22: // %else74
; CHECK-NEXT: tbnz w8, #20, .LBB38_86
; CHECK-NEXT: .LBB38_23: // %else78
; CHECK-NEXT: tbnz w8, #21, .LBB38_87
; CHECK-NEXT: .LBB38_24: // %else82
; CHECK-NEXT: tbnz w8, #22, .LBB38_88
; CHECK-NEXT: .LBB38_25: // %else86
; CHECK-NEXT: tbnz w8, #23, .LBB38_89
; CHECK-NEXT: .LBB38_26: // %else90
; CHECK-NEXT: tbnz w8, #24, .LBB38_90
; CHECK-NEXT: .LBB38_27: // %else94
; CHECK-NEXT: tbnz w8, #25, .LBB38_91
; CHECK-NEXT: .LBB38_28: // %else98
; CHECK-NEXT: tbnz w8, #26, .LBB38_92
; CHECK-NEXT: .LBB38_29: // %else102
; CHECK-NEXT: tbnz w8, #27, .LBB38_93
; CHECK-NEXT: .LBB38_30: // %else106
; CHECK-NEXT: tbnz w8, #28, .LBB38_94
; CHECK-NEXT: .LBB38_31: // %else110
; CHECK-NEXT: tbnz w8, #29, .LBB38_95
; CHECK-NEXT: .LBB38_32: // %else114
; CHECK-NEXT: tbnz w8, #30, .LBB38_96
; CHECK-NEXT: .LBB38_33: // %else118
; CHECK-NEXT: tbnz w8, #31, .LBB38_97
; CHECK-NEXT: .LBB38_34: // %else122
; CHECK-NEXT: tbnz x8, #32, .LBB38_98
; CHECK-NEXT: .LBB38_35: // %else126
; CHECK-NEXT: tbnz x8, #33, .LBB38_99
; CHECK-NEXT: .LBB38_36: // %else130
; CHECK-NEXT: tbnz x8, #34, .LBB38_100
; CHECK-NEXT: .LBB38_37: // %else134
; CHECK-NEXT: tbnz x8, #35, .LBB38_101
; CHECK-NEXT: .LBB38_38: // %else138
; CHECK-NEXT: tbnz x8, #36, .LBB38_102
; CHECK-NEXT: .LBB38_39: // %else142
; CHECK-NEXT: tbnz x8, #37, .LBB38_103
; CHECK-NEXT: .LBB38_40: // %else146
; CHECK-NEXT: tbnz x8, #38, .LBB38_104
; CHECK-NEXT: .LBB38_41: // %else150
; CHECK-NEXT: tbnz x8, #39, .LBB38_105
; CHECK-NEXT: .LBB38_42: // %else154
; CHECK-NEXT: tbnz x8, #40, .LBB38_106
; CHECK-NEXT: .LBB38_43: // %else158
; CHECK-NEXT: tbnz x8, #41, .LBB38_107
; CHECK-NEXT: .LBB38_44: // %else162
; CHECK-NEXT: tbnz x8, #42, .LBB38_108
; CHECK-NEXT: .LBB38_45: // %else166
; CHECK-NEXT: tbnz x8, #43, .LBB38_109
; CHECK-NEXT: .LBB38_46: // %else170
; CHECK-NEXT: tbnz x8, #44, .LBB38_110
; CHECK-NEXT: .LBB38_47: // %else174
; CHECK-NEXT: tbnz x8, #45, .LBB38_111
; CHECK-NEXT: .LBB38_48: // %else178
; CHECK-NEXT: tbnz x8, #46, .LBB38_112
; CHECK-NEXT: .LBB38_49: // %else182
; CHECK-NEXT: tbnz x8, #47, .LBB38_113
; CHECK-NEXT: .LBB38_50: // %else186
; CHECK-NEXT: tbnz x8, #48, .LBB38_114
; CHECK-NEXT: .LBB38_51: // %else190
; CHECK-NEXT: tbnz x8, #49, .LBB38_115
; CHECK-NEXT: .LBB38_52: // %else194
; CHECK-NEXT: tbnz x8, #50, .LBB38_116
; CHECK-NEXT: .LBB38_53: // %else198
; CHECK-NEXT: tbnz x8, #51, .LBB38_117
; CHECK-NEXT: .LBB38_54: // %else202
; CHECK-NEXT: tbnz x8, #52, .LBB38_118
; CHECK-NEXT: .LBB38_55: // %else206
; CHECK-NEXT: tbnz x8, #53, .LBB38_119
; CHECK-NEXT: .LBB38_56: // %else210
; CHECK-NEXT: tbnz x8, #54, .LBB38_120
; CHECK-NEXT: .LBB38_57: // %else214
; CHECK-NEXT: tbnz x8, #55, .LBB38_121
; CHECK-NEXT: .LBB38_58: // %else218
; CHECK-NEXT: tbnz x8, #56, .LBB38_122
; CHECK-NEXT: .LBB38_59: // %else222
; CHECK-NEXT: tbnz x8, #57, .LBB38_123
; CHECK-NEXT: .LBB38_60: // %else226
; CHECK-NEXT: tbnz x8, #58, .LBB38_124
; CHECK-NEXT: .LBB38_61: // %else230
; CHECK-NEXT: tbnz x8, #59, .LBB38_125
; CHECK-NEXT: .LBB38_62: // %else234
; CHECK-NEXT: tbnz x8, #60, .LBB38_126
; CHECK-NEXT: .LBB38_63: // %else238
; CHECK-NEXT: tbnz x8, #61, .LBB38_127
; CHECK-NEXT: .LBB38_64: // %else242
; CHECK-NEXT: tbnz x8, #62, .LBB38_128
; CHECK-NEXT: .LBB38_65: // %else246
; CHECK-NEXT: tbz x8, #63, .LBB38_67
; CHECK-NEXT: .LBB38_66: // %cond.load249
; CHECK-NEXT: mov w8, #63 // =0x3f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: .LBB38_67: // %else250
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB38_68: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #3, .LBB38_6
; CHECK-NEXT: .LBB38_69: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #4, .LBB38_7
; CHECK-NEXT: .LBB38_70: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #5, .LBB38_8
; CHECK-NEXT: .LBB38_71: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #6, .LBB38_9
; CHECK-NEXT: .LBB38_72: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #7, .LBB38_10
; CHECK-NEXT: .LBB38_73: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #8, .LBB38_11
; CHECK-NEXT: .LBB38_74: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #9, .LBB38_12
; CHECK-NEXT: .LBB38_75: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #10, .LBB38_13
; CHECK-NEXT: .LBB38_76: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #11, .LBB38_14
; CHECK-NEXT: .LBB38_77: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #12, .LBB38_15
; CHECK-NEXT: .LBB38_78: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #13, .LBB38_16
; CHECK-NEXT: .LBB38_79: // %cond.load49
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #14, .LBB38_17
; CHECK-NEXT: .LBB38_80: // %cond.load53
; CHECK-NEXT: mov w9, #14 // =0xe
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #15, .LBB38_18
; CHECK-NEXT: .LBB38_81: // %cond.load57
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #16, .LBB38_19
; CHECK-NEXT: .LBB38_82: // %cond.load61
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #17, .LBB38_20
; CHECK-NEXT: .LBB38_83: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #18, .LBB38_21
; CHECK-NEXT: .LBB38_84: // %cond.load69
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #19, .LBB38_22
; CHECK-NEXT: .LBB38_85: // %cond.load73
; CHECK-NEXT: mov w9, #19 // =0x13
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #20, .LBB38_23
; CHECK-NEXT: .LBB38_86: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #21, .LBB38_24
; CHECK-NEXT: .LBB38_87: // %cond.load81
; CHECK-NEXT: mov w9, #21 // =0x15
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #22, .LBB38_25
; CHECK-NEXT: .LBB38_88: // %cond.load85
; CHECK-NEXT: mov w9, #22 // =0x16
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #23, .LBB38_26
; CHECK-NEXT: .LBB38_89: // %cond.load89
; CHECK-NEXT: mov w9, #23 // =0x17
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #24, .LBB38_27
; CHECK-NEXT: .LBB38_90: // %cond.load93
; CHECK-NEXT: mov w9, #24 // =0x18
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #25, .LBB38_28
; CHECK-NEXT: .LBB38_91: // %cond.load97
; CHECK-NEXT: mov w9, #25 // =0x19
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #26, .LBB38_29
; CHECK-NEXT: .LBB38_92: // %cond.load101
; CHECK-NEXT: mov w9, #26 // =0x1a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #27, .LBB38_30
; CHECK-NEXT: .LBB38_93: // %cond.load105
; CHECK-NEXT: mov w9, #27 // =0x1b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #28, .LBB38_31
; CHECK-NEXT: .LBB38_94: // %cond.load109
; CHECK-NEXT: mov w9, #28 // =0x1c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #29, .LBB38_32
; CHECK-NEXT: .LBB38_95: // %cond.load113
; CHECK-NEXT: mov w9, #29 // =0x1d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #30, .LBB38_33
; CHECK-NEXT: .LBB38_96: // %cond.load117
; CHECK-NEXT: mov w9, #30 // =0x1e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #31, .LBB38_34
; CHECK-NEXT: .LBB38_97: // %cond.load121
; CHECK-NEXT: mov w9, #31 // =0x1f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #32, .LBB38_35
; CHECK-NEXT: .LBB38_98: // %cond.load125
; CHECK-NEXT: mov w9, #32 // =0x20
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #33, .LBB38_36
; CHECK-NEXT: .LBB38_99: // %cond.load129
; CHECK-NEXT: mov w9, #33 // =0x21
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #34, .LBB38_37
; CHECK-NEXT: .LBB38_100: // %cond.load133
; CHECK-NEXT: mov w9, #34 // =0x22
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #35, .LBB38_38
; CHECK-NEXT: .LBB38_101: // %cond.load137
; CHECK-NEXT: mov w9, #35 // =0x23
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #36, .LBB38_39
; CHECK-NEXT: .LBB38_102: // %cond.load141
; CHECK-NEXT: mov w9, #36 // =0x24
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #37, .LBB38_40
; CHECK-NEXT: .LBB38_103: // %cond.load145
; CHECK-NEXT: mov w9, #37 // =0x25
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #38, .LBB38_41
; CHECK-NEXT: .LBB38_104: // %cond.load149
; CHECK-NEXT: mov w9, #38 // =0x26
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #39, .LBB38_42
; CHECK-NEXT: .LBB38_105: // %cond.load153
; CHECK-NEXT: mov w9, #39 // =0x27
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #40, .LBB38_43
; CHECK-NEXT: .LBB38_106: // %cond.load157
; CHECK-NEXT: mov w9, #40 // =0x28
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #41, .LBB38_44
; CHECK-NEXT: .LBB38_107: // %cond.load161
; CHECK-NEXT: mov w9, #41 // =0x29
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #42, .LBB38_45
; CHECK-NEXT: .LBB38_108: // %cond.load165
; CHECK-NEXT: mov w9, #42 // =0x2a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #43, .LBB38_46
; CHECK-NEXT: .LBB38_109: // %cond.load169
; CHECK-NEXT: mov w9, #43 // =0x2b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #44, .LBB38_47
; CHECK-NEXT: .LBB38_110: // %cond.load173
; CHECK-NEXT: mov w9, #44 // =0x2c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #45, .LBB38_48
; CHECK-NEXT: .LBB38_111: // %cond.load177
; CHECK-NEXT: mov w9, #45 // =0x2d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #46, .LBB38_49
; CHECK-NEXT: .LBB38_112: // %cond.load181
; CHECK-NEXT: mov w9, #46 // =0x2e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #47, .LBB38_50
; CHECK-NEXT: .LBB38_113: // %cond.load185
; CHECK-NEXT: mov w9, #47 // =0x2f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #48, .LBB38_51
; CHECK-NEXT: .LBB38_114: // %cond.load189
; CHECK-NEXT: mov w9, #48 // =0x30
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #49, .LBB38_52
; CHECK-NEXT: .LBB38_115: // %cond.load193
; CHECK-NEXT: mov w9, #49 // =0x31
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #50, .LBB38_53
; CHECK-NEXT: .LBB38_116: // %cond.load197
; CHECK-NEXT: mov w9, #50 // =0x32
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #51, .LBB38_54
; CHECK-NEXT: .LBB38_117: // %cond.load201
; CHECK-NEXT: mov w9, #51 // =0x33
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #52, .LBB38_55
; CHECK-NEXT: .LBB38_118: // %cond.load205
; CHECK-NEXT: mov w9, #52 // =0x34
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #53, .LBB38_56
; CHECK-NEXT: .LBB38_119: // %cond.load209
; CHECK-NEXT: mov w9, #53 // =0x35
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #54, .LBB38_57
; CHECK-NEXT: .LBB38_120: // %cond.load213
; CHECK-NEXT: mov w9, #54 // =0x36
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #55, .LBB38_58
; CHECK-NEXT: .LBB38_121: // %cond.load217
; CHECK-NEXT: mov w9, #55 // =0x37
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #56, .LBB38_59
; CHECK-NEXT: .LBB38_122: // %cond.load221
; CHECK-NEXT: mov w9, #56 // =0x38
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #57, .LBB38_60
; CHECK-NEXT: .LBB38_123: // %cond.load225
; CHECK-NEXT: mov w9, #57 // =0x39
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #58, .LBB38_61
; CHECK-NEXT: .LBB38_124: // %cond.load229
; CHECK-NEXT: mov w9, #58 // =0x3a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #59, .LBB38_62
; CHECK-NEXT: .LBB38_125: // %cond.load233
; CHECK-NEXT: mov w9, #59 // =0x3b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #60, .LBB38_63
; CHECK-NEXT: .LBB38_126: // %cond.load237
; CHECK-NEXT: mov w9, #60 // =0x3c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #61, .LBB38_64
; CHECK-NEXT: .LBB38_127: // %cond.load241
; CHECK-NEXT: mov w9, #61 // =0x3d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #62, .LBB38_65
; CHECK-NEXT: .LBB38_128: // %cond.load245
; CHECK-NEXT: mov w9, #62 // =0x3e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbnz x8, #63, .LBB38_66
; CHECK-NEXT: b .LBB38_67
;
; CHECK-EXPAND-LABEL: masked_load_sext_v64i8i32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl64
; CHECK-EXPAND-NEXT: ld1b { z0.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s
; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8
; CHECK-EXPAND-NEXT: ld1sb { z0.s }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%b = load <64 x i8>, ptr %bp
%mask = icmp eq <64 x i8> %b, zeroinitializer
%load = call <64 x i8> @llvm.masked.expandload.v64i8(ptr %ap, <64 x i1> %mask, <64 x i8> poison)
%ext = sext <64 x i8> %load to <64 x i32>
store <64 x i32> %ext, ptr %c
ret void
}
define void @masked_load_sext_v32i8i64(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_sext_v32i8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #112
; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: ptrue p1.b, vl32
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x1]
; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, #0
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: umov w13, v0.b[1]
; CHECK-NEXT: fmov w6, s0
; CHECK-NEXT: umov w4, v0.b[7]
; CHECK-NEXT: umov w5, v0.b[8]
; CHECK-NEXT: umov w12, v0.b[2]
; CHECK-NEXT: umov w3, v0.b[9]
; CHECK-NEXT: mov z5.b, z0.b[18]
; CHECK-NEXT: mov z6.b, z0.b[19]
; CHECK-NEXT: umov w11, v0.b[3]
; CHECK-NEXT: and w6, w6, #0x1
; CHECK-NEXT: umov w1, v0.b[10]
; CHECK-NEXT: mov z7.b, z0.b[20]
; CHECK-NEXT: bfi w6, w13, #1, #1
; CHECK-NEXT: umov w18, v0.b[11]
; CHECK-NEXT: mov z16.b, z0.b[21]
; CHECK-NEXT: ubfiz w13, w4, #7, #1
; CHECK-NEXT: ubfiz w4, w5, #8, #1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: mov z17.b, z0.b[22]
; CHECK-NEXT: fmov w20, s5
; CHECK-NEXT: fmov w21, s6
; CHECK-NEXT: bfi w6, w12, #2, #1
; CHECK-NEXT: umov w16, v0.b[12]
; CHECK-NEXT: mov z18.b, z0.b[23]
; CHECK-NEXT: fmov w22, s7
; CHECK-NEXT: orr w12, w13, w4
; CHECK-NEXT: ubfiz w13, w3, #9, #1
; CHECK-NEXT: umov w9, v0.b[5]
; CHECK-NEXT: umov w17, v0.b[13]
; CHECK-NEXT: mov z19.b, z0.b[24]
; CHECK-NEXT: fmov w23, s16
; CHECK-NEXT: bfi w6, w11, #3, #1
; CHECK-NEXT: ubfiz w11, w1, #10, #1
; CHECK-NEXT: mov z20.b, z0.b[25]
; CHECK-NEXT: fmov w24, s17
; CHECK-NEXT: ubfiz w3, w20, #18, #1
; CHECK-NEXT: ubfiz w4, w21, #19, #1
; CHECK-NEXT: orr w12, w12, w13
; CHECK-NEXT: ubfiz w13, w18, #11, #1
; CHECK-NEXT: mov z21.b, z0.b[26]
; CHECK-NEXT: fmov w25, s18
; CHECK-NEXT: ubfiz w1, w22, #20, #1
; CHECK-NEXT: orr w11, w12, w11
; CHECK-NEXT: bfi w6, w10, #4, #1
; CHECK-NEXT: umov w14, v0.b[14]
; CHECK-NEXT: fmov w26, s19
; CHECK-NEXT: orr w3, w3, w4
; CHECK-NEXT: orr w11, w11, w13
; CHECK-NEXT: ubfiz w12, w16, #12, #1
; CHECK-NEXT: ubfiz w13, w23, #21, #1
; CHECK-NEXT: mov z22.b, z0.b[27]
; CHECK-NEXT: fmov w27, s20
; CHECK-NEXT: orr w10, w3, w1
; CHECK-NEXT: bfi w6, w9, #5, #1
; CHECK-NEXT: ubfiz w9, w17, #13, #1
; CHECK-NEXT: ubfiz w16, w24, #22, #1
; CHECK-NEXT: umov w8, v0.b[6]
; CHECK-NEXT: umov w15, v0.b[15]
; CHECK-NEXT: mov z3.b, z0.b[16]
; CHECK-NEXT: mov z23.b, z0.b[28]
; CHECK-NEXT: fmov w5, s21
; CHECK-NEXT: orr w11, w11, w12
; CHECK-NEXT: orr w10, w10, w13
; CHECK-NEXT: ubfiz w12, w25, #23, #1
; CHECK-NEXT: mov z4.b, z0.b[17]
; CHECK-NEXT: mov z24.b, z0.b[29]
; CHECK-NEXT: orr w9, w11, w9
; CHECK-NEXT: orr w10, w10, w16
; CHECK-NEXT: ubfiz w11, w26, #24, #1
; CHECK-NEXT: mov z2.b, z0.b[30]
; CHECK-NEXT: fmov w28, s22
; CHECK-NEXT: orr w10, w10, w12
; CHECK-NEXT: ubfiz w12, w14, #14, #1
; CHECK-NEXT: ubfiz w13, w27, #25, #1
; CHECK-NEXT: fmov w7, s3
; CHECK-NEXT: fmov w29, s23
; CHECK-NEXT: orr w10, w10, w11
; CHECK-NEXT: ubfiz w14, w5, #26, #1
; CHECK-NEXT: fmov w19, s4
; CHECK-NEXT: fmov w30, s24
; CHECK-NEXT: ubfiz w11, w15, #15, #1
; CHECK-NEXT: bfi w6, w8, #6, #1
; CHECK-NEXT: orr w8, w9, w12
; CHECK-NEXT: orr w9, w10, w13
; CHECK-NEXT: orr w9, w9, w14
; CHECK-NEXT: ubfiz w10, w28, #27, #1
; CHECK-NEXT: fmov w14, s2
; CHECK-NEXT: orr w8, w8, w11
; CHECK-NEXT: ubfiz w11, w7, #16, #1
; CHECK-NEXT: ubfiz w13, w29, #28, #1
; CHECK-NEXT: ubfiz w12, w19, #17, #1
; CHECK-NEXT: orr w9, w9, w10
; CHECK-NEXT: ubfiz w10, w30, #29, #1
; CHECK-NEXT: mov z1.b, z0.b[31]
; CHECK-NEXT: orr w8, w8, w11
; CHECK-NEXT: orr w9, w9, w13
; CHECK-NEXT: ubfiz w11, w14, #30, #1
; CHECK-NEXT: orr w8, w8, w12
; CHECK-NEXT: orr w9, w9, w10
; CHECK-NEXT: orr w8, w6, w8
; CHECK-NEXT: orr w9, w9, w11
; CHECK-NEXT: orr w8, w8, w9
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: orr w8, w8, w9, lsl #31
; CHECK-NEXT: tbz w8, #0, .LBB39_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0]
; CHECK-NEXT: add x0, x0, #1
; CHECK-NEXT: tbnz w8, #1, .LBB39_3
; CHECK-NEXT: b .LBB39_4
; CHECK-NEXT: .LBB39_2:
; CHECK-NEXT: adrp x9, .LCPI39_0
; CHECK-NEXT: add x9, x9, :lo12:.LCPI39_0
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9]
; CHECK-NEXT: tbz w8, #1, .LBB39_4
; CHECK-NEXT: .LBB39_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: .LBB39_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB39_36
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB39_37
; CHECK-NEXT: .LBB39_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB39_38
; CHECK-NEXT: .LBB39_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB39_39
; CHECK-NEXT: .LBB39_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB39_40
; CHECK-NEXT: .LBB39_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB39_41
; CHECK-NEXT: .LBB39_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB39_42
; CHECK-NEXT: .LBB39_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB39_43
; CHECK-NEXT: .LBB39_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB39_44
; CHECK-NEXT: .LBB39_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB39_45
; CHECK-NEXT: .LBB39_14: // %else42
; CHECK-NEXT: tbnz w8, #12, .LBB39_46
; CHECK-NEXT: .LBB39_15: // %else46
; CHECK-NEXT: tbnz w8, #13, .LBB39_47
; CHECK-NEXT: .LBB39_16: // %else50
; CHECK-NEXT: tbnz w8, #14, .LBB39_48
; CHECK-NEXT: .LBB39_17: // %else54
; CHECK-NEXT: tbnz w8, #15, .LBB39_49
; CHECK-NEXT: .LBB39_18: // %else58
; CHECK-NEXT: tbnz w8, #16, .LBB39_50
; CHECK-NEXT: .LBB39_19: // %else62
; CHECK-NEXT: tbnz w8, #17, .LBB39_51
; CHECK-NEXT: .LBB39_20: // %else66
; CHECK-NEXT: tbnz w8, #18, .LBB39_52
; CHECK-NEXT: .LBB39_21: // %else70
; CHECK-NEXT: tbnz w8, #19, .LBB39_53
; CHECK-NEXT: .LBB39_22: // %else74
; CHECK-NEXT: tbnz w8, #20, .LBB39_54
; CHECK-NEXT: .LBB39_23: // %else78
; CHECK-NEXT: tbnz w8, #21, .LBB39_55
; CHECK-NEXT: .LBB39_24: // %else82
; CHECK-NEXT: tbnz w8, #22, .LBB39_56
; CHECK-NEXT: .LBB39_25: // %else86
; CHECK-NEXT: tbnz w8, #23, .LBB39_57
; CHECK-NEXT: .LBB39_26: // %else90
; CHECK-NEXT: tbnz w8, #24, .LBB39_58
; CHECK-NEXT: .LBB39_27: // %else94
; CHECK-NEXT: tbnz w8, #25, .LBB39_59
; CHECK-NEXT: .LBB39_28: // %else98
; CHECK-NEXT: tbnz w8, #26, .LBB39_60
; CHECK-NEXT: .LBB39_29: // %else102
; CHECK-NEXT: tbnz w8, #27, .LBB39_61
; CHECK-NEXT: .LBB39_30: // %else106
; CHECK-NEXT: tbnz w8, #28, .LBB39_62
; CHECK-NEXT: .LBB39_31: // %else110
; CHECK-NEXT: tbnz w8, #29, .LBB39_63
; CHECK-NEXT: .LBB39_32: // %else114
; CHECK-NEXT: tbnz w8, #30, .LBB39_64
; CHECK-NEXT: .LBB39_33: // %else118
; CHECK-NEXT: tbz w8, #31, .LBB39_35
; CHECK-NEXT: .LBB39_34: // %cond.load121
; CHECK-NEXT: mov w8, #31 // =0x1f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: .LBB39_35: // %else122
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB39_36: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #3, .LBB39_6
; CHECK-NEXT: .LBB39_37: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #4, .LBB39_7
; CHECK-NEXT: .LBB39_38: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #5, .LBB39_8
; CHECK-NEXT: .LBB39_39: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #6, .LBB39_9
; CHECK-NEXT: .LBB39_40: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #7, .LBB39_10
; CHECK-NEXT: .LBB39_41: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #8, .LBB39_11
; CHECK-NEXT: .LBB39_42: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #9, .LBB39_12
; CHECK-NEXT: .LBB39_43: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #10, .LBB39_13
; CHECK-NEXT: .LBB39_44: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #11, .LBB39_14
; CHECK-NEXT: .LBB39_45: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #12, .LBB39_15
; CHECK-NEXT: .LBB39_46: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #13, .LBB39_16
; CHECK-NEXT: .LBB39_47: // %cond.load49
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #14, .LBB39_17
; CHECK-NEXT: .LBB39_48: // %cond.load53
; CHECK-NEXT: mov w9, #14 // =0xe
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #15, .LBB39_18
; CHECK-NEXT: .LBB39_49: // %cond.load57
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #16, .LBB39_19
; CHECK-NEXT: .LBB39_50: // %cond.load61
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #17, .LBB39_20
; CHECK-NEXT: .LBB39_51: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #18, .LBB39_21
; CHECK-NEXT: .LBB39_52: // %cond.load69
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #19, .LBB39_22
; CHECK-NEXT: .LBB39_53: // %cond.load73
; CHECK-NEXT: mov w9, #19 // =0x13
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #20, .LBB39_23
; CHECK-NEXT: .LBB39_54: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #21, .LBB39_24
; CHECK-NEXT: .LBB39_55: // %cond.load81
; CHECK-NEXT: mov w9, #21 // =0x15
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #22, .LBB39_25
; CHECK-NEXT: .LBB39_56: // %cond.load85
; CHECK-NEXT: mov w9, #22 // =0x16
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #23, .LBB39_26
; CHECK-NEXT: .LBB39_57: // %cond.load89
; CHECK-NEXT: mov w9, #23 // =0x17
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #24, .LBB39_27
; CHECK-NEXT: .LBB39_58: // %cond.load93
; CHECK-NEXT: mov w9, #24 // =0x18
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #25, .LBB39_28
; CHECK-NEXT: .LBB39_59: // %cond.load97
; CHECK-NEXT: mov w9, #25 // =0x19
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #26, .LBB39_29
; CHECK-NEXT: .LBB39_60: // %cond.load101
; CHECK-NEXT: mov w9, #26 // =0x1a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #27, .LBB39_30
; CHECK-NEXT: .LBB39_61: // %cond.load105
; CHECK-NEXT: mov w9, #27 // =0x1b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #28, .LBB39_31
; CHECK-NEXT: .LBB39_62: // %cond.load109
; CHECK-NEXT: mov w9, #28 // =0x1c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #29, .LBB39_32
; CHECK-NEXT: .LBB39_63: // %cond.load113
; CHECK-NEXT: mov w9, #29 // =0x1d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #30, .LBB39_33
; CHECK-NEXT: .LBB39_64: // %cond.load117
; CHECK-NEXT: mov w9, #30 // =0x1e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbnz w8, #31, .LBB39_34
; CHECK-NEXT: b .LBB39_35
;
; CHECK-EXPAND-LABEL: masked_load_sext_v32i8i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.d, vl32
; CHECK-EXPAND-NEXT: ld1b { z0.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.d
; CHECK-EXPAND-NEXT: whilelo p2.d, xzr, x8
; CHECK-EXPAND-NEXT: ld1sb { z0.d }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.d, p1, z0.d
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%b = load <32 x i8>, ptr %bp
%mask = icmp eq <32 x i8> %b, zeroinitializer
%load = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %ap, <32 x i1> %mask, <32 x i8> poison)
%ext = sext <32 x i8> %load to <32 x i64>
store <32 x i64> %ext, ptr %c
ret void
}
define void @masked_load_sext_v64i16i32(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_sext_v64i16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #112
; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: ptrue p1.h, vl64
; CHECK-NEXT: str x2, [sp] // 8-byte Spill
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x1]
; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, #0
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: umov w12, v0.b[1]
; CHECK-NEXT: fmov w25, s0
; CHECK-NEXT: mov z3.b, z0.b[18]
; CHECK-NEXT: mov z4.b, z0.b[19]
; CHECK-NEXT: umov w13, v0.b[2]
; CHECK-NEXT: umov w14, v0.b[7]
; CHECK-NEXT: umov w3, v0.b[8]
; CHECK-NEXT: mov z5.b, z0.b[20]
; CHECK-NEXT: umov w4, v0.b[9]
; CHECK-NEXT: mov z6.b, z0.b[21]
; CHECK-NEXT: and x25, x25, #0x1
; CHECK-NEXT: umov w5, v0.b[10]
; CHECK-NEXT: mov z7.b, z0.b[22]
; CHECK-NEXT: fmov w19, s3
; CHECK-NEXT: fmov w20, s4
; CHECK-NEXT: bfi x25, x12, #1, #1
; CHECK-NEXT: umov w11, v0.b[3]
; CHECK-NEXT: mov z16.b, z0.b[23]
; CHECK-NEXT: fmov w21, s5
; CHECK-NEXT: umov w15, v0.b[11]
; CHECK-NEXT: fmov w22, s6
; CHECK-NEXT: bfi x25, x13, #2, #1
; CHECK-NEXT: ubfiz x13, x14, #7, #1
; CHECK-NEXT: ubfiz x14, x3, #8, #1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: umov w17, v0.b[12]
; CHECK-NEXT: mov z17.b, z0.b[24]
; CHECK-NEXT: fmov w23, s7
; CHECK-NEXT: ubfiz x3, x4, #9, #1
; CHECK-NEXT: ubfiz x4, x19, #18, #1
; CHECK-NEXT: ubfiz x19, x20, #19, #1
; CHECK-NEXT: umov w18, v0.b[13]
; CHECK-NEXT: mov z18.b, z0.b[25]
; CHECK-NEXT: fmov w24, s16
; CHECK-NEXT: orr x13, x13, x14
; CHECK-NEXT: ubfiz x14, x5, #10, #1
; CHECK-NEXT: ubfiz x5, x21, #20, #1
; CHECK-NEXT: umov w9, v0.b[5]
; CHECK-NEXT: umov w16, v0.b[14]
; CHECK-NEXT: mov z19.b, z0.b[26]
; CHECK-NEXT: orr x4, x4, x19
; CHECK-NEXT: orr x13, x13, x3
; CHECK-NEXT: ubfiz x3, x22, #21, #1
; CHECK-NEXT: bfi x25, x11, #3, #1
; CHECK-NEXT: mov z20.b, z0.b[27]
; CHECK-NEXT: fmov w26, s17
; CHECK-NEXT: orr x11, x13, x14
; CHECK-NEXT: orr x13, x4, x5
; CHECK-NEXT: ubfiz x14, x15, #11, #1
; CHECK-NEXT: ubfiz x15, x23, #22, #1
; CHECK-NEXT: mov z1.b, z0.b[16]
; CHECK-NEXT: mov z21.b, z0.b[28]
; CHECK-NEXT: fmov w27, s18
; CHECK-NEXT: orr x13, x13, x3
; CHECK-NEXT: bfi x25, x10, #4, #1
; CHECK-NEXT: ubfiz x10, x17, #12, #1
; CHECK-NEXT: ubfiz x17, x24, #23, #1
; CHECK-NEXT: umov w1, v0.b[15]
; CHECK-NEXT: mov z2.b, z0.b[17]
; CHECK-NEXT: mov z4.b, z0.b[29]
; CHECK-NEXT: fmov w28, s19
; CHECK-NEXT: orr x11, x11, x14
; CHECK-NEXT: orr x13, x13, x15
; CHECK-NEXT: ubfiz x14, x18, #13, #1
; CHECK-NEXT: mov z5.b, z0.b[30]
; CHECK-NEXT: fmov w29, s20
; CHECK-NEXT: orr x10, x11, x10
; CHECK-NEXT: bfi x25, x9, #5, #1
; CHECK-NEXT: orr x9, x13, x17
; CHECK-NEXT: ubfiz x11, x16, #14, #1
; CHECK-NEXT: ubfiz x13, x26, #24, #1
; CHECK-NEXT: fmov w6, s1
; CHECK-NEXT: fmov w12, s21
; CHECK-NEXT: orr x10, x10, x14
; CHECK-NEXT: ubfiz x15, x27, #25, #1
; CHECK-NEXT: umov w2, v0.b[6]
; CHECK-NEXT: fmov w7, s2
; CHECK-NEXT: fmov w30, s4
; CHECK-NEXT: orr x10, x10, x11
; CHECK-NEXT: orr x9, x9, x13
; CHECK-NEXT: ubfiz x11, x28, #26, #1
; CHECK-NEXT: fmov w8, s5
; CHECK-NEXT: ubfiz x14, x1, #15, #1
; CHECK-NEXT: orr x9, x9, x15
; CHECK-NEXT: ubfiz x13, x29, #27, #1
; CHECK-NEXT: mov z3.b, z0.b[31]
; CHECK-NEXT: orr x9, x9, x11
; CHECK-NEXT: ubfiz x11, x6, #16, #1
; CHECK-NEXT: ubfiz x12, x12, #28, #1
; CHECK-NEXT: orr x10, x10, x14
; CHECK-NEXT: orr x9, x9, x13
; CHECK-NEXT: ubfiz x13, x7, #17, #1
; CHECK-NEXT: ubfiz x14, x30, #29, #1
; CHECK-NEXT: mov z2.b, z0.b[32]
; CHECK-NEXT: bfi x25, x2, #6, #1
; CHECK-NEXT: orr x10, x10, x11
; CHECK-NEXT: orr x9, x9, x12
; CHECK-NEXT: ubfiz x8, x8, #30, #1
; CHECK-NEXT: fmov w11, s3
; CHECK-NEXT: orr x10, x10, x13
; CHECK-NEXT: orr x9, x9, x14
; CHECK-NEXT: mov z1.b, z0.b[33]
; CHECK-NEXT: orr x10, x25, x10
; CHECK-NEXT: orr x8, x9, x8
; CHECK-NEXT: orr x8, x10, x8
; CHECK-NEXT: fmov w10, s2
; CHECK-NEXT: lsl w9, w11, #31
; CHECK-NEXT: mov z2.b, z0.b[34]
; CHECK-NEXT: orr x8, x8, x9
; CHECK-NEXT: and w9, w10, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #32
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[35]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #33
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[36]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #34
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[37]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #35
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[38]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #36
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[39]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #37
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[40]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #38
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[41]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #39
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[42]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #40
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[43]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #41
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[44]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #42
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[45]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #43
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[46]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #44
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[47]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #45
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[48]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #46
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[49]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #47
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[50]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #48
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[51]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #49
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[52]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #50
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[53]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #51
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[54]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #52
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[55]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #53
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[56]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #54
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[57]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #55
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[58]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #56
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[59]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #57
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[60]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #58
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[61]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: fmov w10, s1
; CHECK-NEXT: orr x8, x8, x9, lsl #59
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[62]
; CHECK-NEXT: mov z0.b, z0.b[63]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #60
; CHECK-NEXT: and w9, w10, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #61
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #62
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: orr x8, x8, x9, lsl #63
; CHECK-NEXT: tbz w8, #0, .LBB40_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0]
; CHECK-NEXT: add x0, x0, #2
; CHECK-NEXT: tbnz w8, #1, .LBB40_3
; CHECK-NEXT: b .LBB40_4
; CHECK-NEXT: .LBB40_2:
; CHECK-NEXT: adrp x9, .LCPI40_0
; CHECK-NEXT: add x9, x9, :lo12:.LCPI40_0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x9]
; CHECK-NEXT: tbz w8, #1, .LBB40_4
; CHECK-NEXT: .LBB40_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: .LBB40_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB40_68
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB40_69
; CHECK-NEXT: .LBB40_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB40_70
; CHECK-NEXT: .LBB40_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB40_71
; CHECK-NEXT: .LBB40_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB40_72
; CHECK-NEXT: .LBB40_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB40_73
; CHECK-NEXT: .LBB40_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB40_74
; CHECK-NEXT: .LBB40_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB40_75
; CHECK-NEXT: .LBB40_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB40_76
; CHECK-NEXT: .LBB40_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB40_77
; CHECK-NEXT: .LBB40_14: // %else42
; CHECK-NEXT: tbnz w8, #12, .LBB40_78
; CHECK-NEXT: .LBB40_15: // %else46
; CHECK-NEXT: tbnz w8, #13, .LBB40_79
; CHECK-NEXT: .LBB40_16: // %else50
; CHECK-NEXT: tbnz w8, #14, .LBB40_80
; CHECK-NEXT: .LBB40_17: // %else54
; CHECK-NEXT: tbnz w8, #15, .LBB40_81
; CHECK-NEXT: .LBB40_18: // %else58
; CHECK-NEXT: tbnz w8, #16, .LBB40_82
; CHECK-NEXT: .LBB40_19: // %else62
; CHECK-NEXT: tbnz w8, #17, .LBB40_83
; CHECK-NEXT: .LBB40_20: // %else66
; CHECK-NEXT: tbnz w8, #18, .LBB40_84
; CHECK-NEXT: .LBB40_21: // %else70
; CHECK-NEXT: tbnz w8, #19, .LBB40_85
; CHECK-NEXT: .LBB40_22: // %else74
; CHECK-NEXT: tbnz w8, #20, .LBB40_86
; CHECK-NEXT: .LBB40_23: // %else78
; CHECK-NEXT: tbnz w8, #21, .LBB40_87
; CHECK-NEXT: .LBB40_24: // %else82
; CHECK-NEXT: tbnz w8, #22, .LBB40_88
; CHECK-NEXT: .LBB40_25: // %else86
; CHECK-NEXT: tbnz w8, #23, .LBB40_89
; CHECK-NEXT: .LBB40_26: // %else90
; CHECK-NEXT: tbnz w8, #24, .LBB40_90
; CHECK-NEXT: .LBB40_27: // %else94
; CHECK-NEXT: tbnz w8, #25, .LBB40_91
; CHECK-NEXT: .LBB40_28: // %else98
; CHECK-NEXT: tbnz w8, #26, .LBB40_92
; CHECK-NEXT: .LBB40_29: // %else102
; CHECK-NEXT: tbnz w8, #27, .LBB40_93
; CHECK-NEXT: .LBB40_30: // %else106
; CHECK-NEXT: tbnz w8, #28, .LBB40_94
; CHECK-NEXT: .LBB40_31: // %else110
; CHECK-NEXT: tbnz w8, #29, .LBB40_95
; CHECK-NEXT: .LBB40_32: // %else114
; CHECK-NEXT: tbnz w8, #30, .LBB40_96
; CHECK-NEXT: .LBB40_33: // %else118
; CHECK-NEXT: tbnz w8, #31, .LBB40_97
; CHECK-NEXT: .LBB40_34: // %else122
; CHECK-NEXT: tbnz x8, #32, .LBB40_98
; CHECK-NEXT: .LBB40_35: // %else126
; CHECK-NEXT: tbnz x8, #33, .LBB40_99
; CHECK-NEXT: .LBB40_36: // %else130
; CHECK-NEXT: tbnz x8, #34, .LBB40_100
; CHECK-NEXT: .LBB40_37: // %else134
; CHECK-NEXT: tbnz x8, #35, .LBB40_101
; CHECK-NEXT: .LBB40_38: // %else138
; CHECK-NEXT: tbnz x8, #36, .LBB40_102
; CHECK-NEXT: .LBB40_39: // %else142
; CHECK-NEXT: tbnz x8, #37, .LBB40_103
; CHECK-NEXT: .LBB40_40: // %else146
; CHECK-NEXT: tbnz x8, #38, .LBB40_104
; CHECK-NEXT: .LBB40_41: // %else150
; CHECK-NEXT: tbnz x8, #39, .LBB40_105
; CHECK-NEXT: .LBB40_42: // %else154
; CHECK-NEXT: tbnz x8, #40, .LBB40_106
; CHECK-NEXT: .LBB40_43: // %else158
; CHECK-NEXT: tbnz x8, #41, .LBB40_107
; CHECK-NEXT: .LBB40_44: // %else162
; CHECK-NEXT: tbnz x8, #42, .LBB40_108
; CHECK-NEXT: .LBB40_45: // %else166
; CHECK-NEXT: tbnz x8, #43, .LBB40_109
; CHECK-NEXT: .LBB40_46: // %else170
; CHECK-NEXT: tbnz x8, #44, .LBB40_110
; CHECK-NEXT: .LBB40_47: // %else174
; CHECK-NEXT: tbnz x8, #45, .LBB40_111
; CHECK-NEXT: .LBB40_48: // %else178
; CHECK-NEXT: tbnz x8, #46, .LBB40_112
; CHECK-NEXT: .LBB40_49: // %else182
; CHECK-NEXT: tbnz x8, #47, .LBB40_113
; CHECK-NEXT: .LBB40_50: // %else186
; CHECK-NEXT: tbnz x8, #48, .LBB40_114
; CHECK-NEXT: .LBB40_51: // %else190
; CHECK-NEXT: tbnz x8, #49, .LBB40_115
; CHECK-NEXT: .LBB40_52: // %else194
; CHECK-NEXT: tbnz x8, #50, .LBB40_116
; CHECK-NEXT: .LBB40_53: // %else198
; CHECK-NEXT: tbnz x8, #51, .LBB40_117
; CHECK-NEXT: .LBB40_54: // %else202
; CHECK-NEXT: tbnz x8, #52, .LBB40_118
; CHECK-NEXT: .LBB40_55: // %else206
; CHECK-NEXT: tbnz x8, #53, .LBB40_119
; CHECK-NEXT: .LBB40_56: // %else210
; CHECK-NEXT: tbnz x8, #54, .LBB40_120
; CHECK-NEXT: .LBB40_57: // %else214
; CHECK-NEXT: tbnz x8, #55, .LBB40_121
; CHECK-NEXT: .LBB40_58: // %else218
; CHECK-NEXT: tbnz x8, #56, .LBB40_122
; CHECK-NEXT: .LBB40_59: // %else222
; CHECK-NEXT: tbnz x8, #57, .LBB40_123
; CHECK-NEXT: .LBB40_60: // %else226
; CHECK-NEXT: tbnz x8, #58, .LBB40_124
; CHECK-NEXT: .LBB40_61: // %else230
; CHECK-NEXT: tbnz x8, #59, .LBB40_125
; CHECK-NEXT: .LBB40_62: // %else234
; CHECK-NEXT: tbnz x8, #60, .LBB40_126
; CHECK-NEXT: .LBB40_63: // %else238
; CHECK-NEXT: tbnz x8, #61, .LBB40_127
; CHECK-NEXT: .LBB40_64: // %else242
; CHECK-NEXT: tbnz x8, #62, .LBB40_128
; CHECK-NEXT: .LBB40_65: // %else246
; CHECK-NEXT: tbz x8, #63, .LBB40_67
; CHECK-NEXT: .LBB40_66: // %cond.load249
; CHECK-NEXT: mov w8, #63 // =0x3f
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w8
; CHECK-NEXT: .LBB40_67: // %else250
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ldr x8, [sp] // 8-byte Reload
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB40_68: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #3, .LBB40_6
; CHECK-NEXT: .LBB40_69: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #4, .LBB40_7
; CHECK-NEXT: .LBB40_70: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #5, .LBB40_8
; CHECK-NEXT: .LBB40_71: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #6, .LBB40_9
; CHECK-NEXT: .LBB40_72: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #7, .LBB40_10
; CHECK-NEXT: .LBB40_73: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #8, .LBB40_11
; CHECK-NEXT: .LBB40_74: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #9, .LBB40_12
; CHECK-NEXT: .LBB40_75: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #10, .LBB40_13
; CHECK-NEXT: .LBB40_76: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #11, .LBB40_14
; CHECK-NEXT: .LBB40_77: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #12, .LBB40_15
; CHECK-NEXT: .LBB40_78: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #13, .LBB40_16
; CHECK-NEXT: .LBB40_79: // %cond.load49
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #14, .LBB40_17
; CHECK-NEXT: .LBB40_80: // %cond.load53
; CHECK-NEXT: mov w9, #14 // =0xe
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #15, .LBB40_18
; CHECK-NEXT: .LBB40_81: // %cond.load57
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #16, .LBB40_19
; CHECK-NEXT: .LBB40_82: // %cond.load61
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #17, .LBB40_20
; CHECK-NEXT: .LBB40_83: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #18, .LBB40_21
; CHECK-NEXT: .LBB40_84: // %cond.load69
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #19, .LBB40_22
; CHECK-NEXT: .LBB40_85: // %cond.load73
; CHECK-NEXT: mov w9, #19 // =0x13
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #20, .LBB40_23
; CHECK-NEXT: .LBB40_86: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #21, .LBB40_24
; CHECK-NEXT: .LBB40_87: // %cond.load81
; CHECK-NEXT: mov w9, #21 // =0x15
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #22, .LBB40_25
; CHECK-NEXT: .LBB40_88: // %cond.load85
; CHECK-NEXT: mov w9, #22 // =0x16
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #23, .LBB40_26
; CHECK-NEXT: .LBB40_89: // %cond.load89
; CHECK-NEXT: mov w9, #23 // =0x17
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #24, .LBB40_27
; CHECK-NEXT: .LBB40_90: // %cond.load93
; CHECK-NEXT: mov w9, #24 // =0x18
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #25, .LBB40_28
; CHECK-NEXT: .LBB40_91: // %cond.load97
; CHECK-NEXT: mov w9, #25 // =0x19
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #26, .LBB40_29
; CHECK-NEXT: .LBB40_92: // %cond.load101
; CHECK-NEXT: mov w9, #26 // =0x1a
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #27, .LBB40_30
; CHECK-NEXT: .LBB40_93: // %cond.load105
; CHECK-NEXT: mov w9, #27 // =0x1b
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #28, .LBB40_31
; CHECK-NEXT: .LBB40_94: // %cond.load109
; CHECK-NEXT: mov w9, #28 // =0x1c
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #29, .LBB40_32
; CHECK-NEXT: .LBB40_95: // %cond.load113
; CHECK-NEXT: mov w9, #29 // =0x1d
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #30, .LBB40_33
; CHECK-NEXT: .LBB40_96: // %cond.load117
; CHECK-NEXT: mov w9, #30 // =0x1e
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #31, .LBB40_34
; CHECK-NEXT: .LBB40_97: // %cond.load121
; CHECK-NEXT: mov w9, #31 // =0x1f
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #32, .LBB40_35
; CHECK-NEXT: .LBB40_98: // %cond.load125
; CHECK-NEXT: mov w9, #32 // =0x20
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #33, .LBB40_36
; CHECK-NEXT: .LBB40_99: // %cond.load129
; CHECK-NEXT: mov w9, #33 // =0x21
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #34, .LBB40_37
; CHECK-NEXT: .LBB40_100: // %cond.load133
; CHECK-NEXT: mov w9, #34 // =0x22
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #35, .LBB40_38
; CHECK-NEXT: .LBB40_101: // %cond.load137
; CHECK-NEXT: mov w9, #35 // =0x23
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #36, .LBB40_39
; CHECK-NEXT: .LBB40_102: // %cond.load141
; CHECK-NEXT: mov w9, #36 // =0x24
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #37, .LBB40_40
; CHECK-NEXT: .LBB40_103: // %cond.load145
; CHECK-NEXT: mov w9, #37 // =0x25
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #38, .LBB40_41
; CHECK-NEXT: .LBB40_104: // %cond.load149
; CHECK-NEXT: mov w9, #38 // =0x26
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #39, .LBB40_42
; CHECK-NEXT: .LBB40_105: // %cond.load153
; CHECK-NEXT: mov w9, #39 // =0x27
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #40, .LBB40_43
; CHECK-NEXT: .LBB40_106: // %cond.load157
; CHECK-NEXT: mov w9, #40 // =0x28
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #41, .LBB40_44
; CHECK-NEXT: .LBB40_107: // %cond.load161
; CHECK-NEXT: mov w9, #41 // =0x29
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #42, .LBB40_45
; CHECK-NEXT: .LBB40_108: // %cond.load165
; CHECK-NEXT: mov w9, #42 // =0x2a
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #43, .LBB40_46
; CHECK-NEXT: .LBB40_109: // %cond.load169
; CHECK-NEXT: mov w9, #43 // =0x2b
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #44, .LBB40_47
; CHECK-NEXT: .LBB40_110: // %cond.load173
; CHECK-NEXT: mov w9, #44 // =0x2c
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #45, .LBB40_48
; CHECK-NEXT: .LBB40_111: // %cond.load177
; CHECK-NEXT: mov w9, #45 // =0x2d
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #46, .LBB40_49
; CHECK-NEXT: .LBB40_112: // %cond.load181
; CHECK-NEXT: mov w9, #46 // =0x2e
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #47, .LBB40_50
; CHECK-NEXT: .LBB40_113: // %cond.load185
; CHECK-NEXT: mov w9, #47 // =0x2f
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #48, .LBB40_51
; CHECK-NEXT: .LBB40_114: // %cond.load189
; CHECK-NEXT: mov w9, #48 // =0x30
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #49, .LBB40_52
; CHECK-NEXT: .LBB40_115: // %cond.load193
; CHECK-NEXT: mov w9, #49 // =0x31
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #50, .LBB40_53
; CHECK-NEXT: .LBB40_116: // %cond.load197
; CHECK-NEXT: mov w9, #50 // =0x32
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #51, .LBB40_54
; CHECK-NEXT: .LBB40_117: // %cond.load201
; CHECK-NEXT: mov w9, #51 // =0x33
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #52, .LBB40_55
; CHECK-NEXT: .LBB40_118: // %cond.load205
; CHECK-NEXT: mov w9, #52 // =0x34
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #53, .LBB40_56
; CHECK-NEXT: .LBB40_119: // %cond.load209
; CHECK-NEXT: mov w9, #53 // =0x35
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #54, .LBB40_57
; CHECK-NEXT: .LBB40_120: // %cond.load213
; CHECK-NEXT: mov w9, #54 // =0x36
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #55, .LBB40_58
; CHECK-NEXT: .LBB40_121: // %cond.load217
; CHECK-NEXT: mov w9, #55 // =0x37
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #56, .LBB40_59
; CHECK-NEXT: .LBB40_122: // %cond.load221
; CHECK-NEXT: mov w9, #56 // =0x38
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #57, .LBB40_60
; CHECK-NEXT: .LBB40_123: // %cond.load225
; CHECK-NEXT: mov w9, #57 // =0x39
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #58, .LBB40_61
; CHECK-NEXT: .LBB40_124: // %cond.load229
; CHECK-NEXT: mov w9, #58 // =0x3a
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #59, .LBB40_62
; CHECK-NEXT: .LBB40_125: // %cond.load233
; CHECK-NEXT: mov w9, #59 // =0x3b
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #60, .LBB40_63
; CHECK-NEXT: .LBB40_126: // %cond.load237
; CHECK-NEXT: mov w9, #60 // =0x3c
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #61, .LBB40_64
; CHECK-NEXT: .LBB40_127: // %cond.load241
; CHECK-NEXT: mov w9, #61 // =0x3d
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #62, .LBB40_65
; CHECK-NEXT: .LBB40_128: // %cond.load245
; CHECK-NEXT: mov w9, #62 // =0x3e
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbnz x8, #63, .LBB40_66
; CHECK-NEXT: b .LBB40_67
;
; CHECK-EXPAND-LABEL: masked_load_sext_v64i16i32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl64
; CHECK-EXPAND-NEXT: ld1h { z0.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s
; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8
; CHECK-EXPAND-NEXT: ld1sh { z0.s }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%b = load <64 x i16>, ptr %bp
%mask = icmp eq <64 x i16> %b, zeroinitializer
%load = call <64 x i16> @llvm.masked.expandload.v64i16(ptr %ap, <64 x i1> %mask, <64 x i16> poison)
%ext = sext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, ptr %c
ret void
}
define void @masked_load_sext_v32i16i64(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_sext_v32i16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #112
; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: ptrue p1.h, vl32
; CHECK-NEXT: str x2, [sp] // 8-byte Spill
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x1]
; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, #0
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: umov w12, v0.b[1]
; CHECK-NEXT: fmov w6, s0
; CHECK-NEXT: umov w3, v0.b[7]
; CHECK-NEXT: umov w5, v0.b[8]
; CHECK-NEXT: mov z5.b, z0.b[18]
; CHECK-NEXT: mov z6.b, z0.b[19]
; CHECK-NEXT: umov w13, v0.b[2]
; CHECK-NEXT: umov w4, v0.b[9]
; CHECK-NEXT: mov z7.b, z0.b[20]
; CHECK-NEXT: umov w1, v0.b[10]
; CHECK-NEXT: and w6, w6, #0x1
; CHECK-NEXT: mov z16.b, z0.b[21]
; CHECK-NEXT: fmov w20, s5
; CHECK-NEXT: fmov w21, s6
; CHECK-NEXT: bfi w6, w12, #1, #1
; CHECK-NEXT: umov w11, v0.b[3]
; CHECK-NEXT: umov w16, v0.b[11]
; CHECK-NEXT: mov z17.b, z0.b[22]
; CHECK-NEXT: fmov w22, s7
; CHECK-NEXT: ubfiz w12, w3, #7, #1
; CHECK-NEXT: ubfiz w3, w5, #8, #1
; CHECK-NEXT: umov w17, v0.b[12]
; CHECK-NEXT: mov z18.b, z0.b[23]
; CHECK-NEXT: bfi w6, w13, #2, #1
; CHECK-NEXT: ubfiz w13, w4, #9, #1
; CHECK-NEXT: umov w18, v0.b[13]
; CHECK-NEXT: mov z19.b, z0.b[24]
; CHECK-NEXT: fmov w23, s16
; CHECK-NEXT: ubfiz w5, w20, #18, #1
; CHECK-NEXT: ubfiz w20, w21, #19, #1
; CHECK-NEXT: orr w12, w12, w3
; CHECK-NEXT: ubfiz w1, w1, #10, #1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: mov z20.b, z0.b[25]
; CHECK-NEXT: fmov w24, s17
; CHECK-NEXT: ubfiz w4, w22, #20, #1
; CHECK-NEXT: orr w12, w12, w13
; CHECK-NEXT: mov z21.b, z0.b[26]
; CHECK-NEXT: fmov w25, s18
; CHECK-NEXT: orr w3, w5, w20
; CHECK-NEXT: bfi w6, w11, #3, #1
; CHECK-NEXT: orr w11, w12, w1
; CHECK-NEXT: ubfiz w12, w16, #11, #1
; CHECK-NEXT: umov w9, v0.b[5]
; CHECK-NEXT: umov w14, v0.b[14]
; CHECK-NEXT: mov z22.b, z0.b[27]
; CHECK-NEXT: fmov w26, s19
; CHECK-NEXT: orr w13, w3, w4
; CHECK-NEXT: ubfiz w3, w23, #21, #1
; CHECK-NEXT: ubfiz w16, w17, #12, #1
; CHECK-NEXT: fmov w27, s20
; CHECK-NEXT: ubfiz w17, w24, #22, #1
; CHECK-NEXT: orr w11, w11, w12
; CHECK-NEXT: ubfiz w12, w18, #13, #1
; CHECK-NEXT: fmov w28, s21
; CHECK-NEXT: orr w13, w13, w3
; CHECK-NEXT: ubfiz w18, w25, #23, #1
; CHECK-NEXT: bfi w6, w10, #4, #1
; CHECK-NEXT: orr w10, w11, w16
; CHECK-NEXT: umov w15, v0.b[15]
; CHECK-NEXT: mov z3.b, z0.b[16]
; CHECK-NEXT: mov z23.b, z0.b[28]
; CHECK-NEXT: fmov w29, s22
; CHECK-NEXT: orr w11, w13, w17
; CHECK-NEXT: orr w10, w10, w12
; CHECK-NEXT: ubfiz w12, w26, #24, #1
; CHECK-NEXT: mov z4.b, z0.b[17]
; CHECK-NEXT: mov z24.b, z0.b[29]
; CHECK-NEXT: orr w11, w11, w18
; CHECK-NEXT: bfi w6, w9, #5, #1
; CHECK-NEXT: ubfiz w9, w14, #14, #1
; CHECK-NEXT: ubfiz w13, w27, #25, #1
; CHECK-NEXT: mov z2.b, z0.b[30]
; CHECK-NEXT: orr w11, w11, w12
; CHECK-NEXT: ubfiz w14, w28, #26, #1
; CHECK-NEXT: fmov w7, s3
; CHECK-NEXT: fmov w30, s23
; CHECK-NEXT: orr w9, w10, w9
; CHECK-NEXT: orr w10, w11, w13
; CHECK-NEXT: ubfiz w11, w29, #27, #1
; CHECK-NEXT: umov w2, v0.b[6]
; CHECK-NEXT: fmov w19, s4
; CHECK-NEXT: fmov w8, s24
; CHECK-NEXT: ubfiz w12, w15, #15, #1
; CHECK-NEXT: orr w10, w10, w14
; CHECK-NEXT: ubfiz w14, w30, #28, #1
; CHECK-NEXT: mov z1.b, z0.b[31]
; CHECK-NEXT: orr w10, w10, w11
; CHECK-NEXT: fmov w11, s2
; CHECK-NEXT: orr w9, w9, w12
; CHECK-NEXT: ubfiz w12, w7, #16, #1
; CHECK-NEXT: ubfiz w13, w19, #17, #1
; CHECK-NEXT: ubfiz w8, w8, #29, #1
; CHECK-NEXT: bfi w6, w2, #6, #1
; CHECK-NEXT: orr w10, w10, w14
; CHECK-NEXT: orr w9, w9, w12
; CHECK-NEXT: ubfiz w11, w11, #30, #1
; CHECK-NEXT: orr w8, w10, w8
; CHECK-NEXT: orr w9, w9, w13
; CHECK-NEXT: orr w9, w6, w9
; CHECK-NEXT: orr w8, w8, w11
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: orr w8, w8, w9, lsl #31
; CHECK-NEXT: tbz w8, #0, .LBB41_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0]
; CHECK-NEXT: add x0, x0, #2
; CHECK-NEXT: tbnz w8, #1, .LBB41_3
; CHECK-NEXT: b .LBB41_4
; CHECK-NEXT: .LBB41_2:
; CHECK-NEXT: adrp x9, .LCPI41_0
; CHECK-NEXT: add x9, x9, :lo12:.LCPI41_0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x9]
; CHECK-NEXT: tbz w8, #1, .LBB41_4
; CHECK-NEXT: .LBB41_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: .LBB41_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB41_36
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB41_37
; CHECK-NEXT: .LBB41_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB41_38
; CHECK-NEXT: .LBB41_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB41_39
; CHECK-NEXT: .LBB41_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB41_40
; CHECK-NEXT: .LBB41_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB41_41
; CHECK-NEXT: .LBB41_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB41_42
; CHECK-NEXT: .LBB41_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB41_43
; CHECK-NEXT: .LBB41_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB41_44
; CHECK-NEXT: .LBB41_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB41_45
; CHECK-NEXT: .LBB41_14: // %else42
; CHECK-NEXT: tbnz w8, #12, .LBB41_46
; CHECK-NEXT: .LBB41_15: // %else46
; CHECK-NEXT: tbnz w8, #13, .LBB41_47
; CHECK-NEXT: .LBB41_16: // %else50
; CHECK-NEXT: tbnz w8, #14, .LBB41_48
; CHECK-NEXT: .LBB41_17: // %else54
; CHECK-NEXT: tbnz w8, #15, .LBB41_49
; CHECK-NEXT: .LBB41_18: // %else58
; CHECK-NEXT: tbnz w8, #16, .LBB41_50
; CHECK-NEXT: .LBB41_19: // %else62
; CHECK-NEXT: tbnz w8, #17, .LBB41_51
; CHECK-NEXT: .LBB41_20: // %else66
; CHECK-NEXT: tbnz w8, #18, .LBB41_52
; CHECK-NEXT: .LBB41_21: // %else70
; CHECK-NEXT: tbnz w8, #19, .LBB41_53
; CHECK-NEXT: .LBB41_22: // %else74
; CHECK-NEXT: tbnz w8, #20, .LBB41_54
; CHECK-NEXT: .LBB41_23: // %else78
; CHECK-NEXT: tbnz w8, #21, .LBB41_55
; CHECK-NEXT: .LBB41_24: // %else82
; CHECK-NEXT: tbnz w8, #22, .LBB41_56
; CHECK-NEXT: .LBB41_25: // %else86
; CHECK-NEXT: tbnz w8, #23, .LBB41_57
; CHECK-NEXT: .LBB41_26: // %else90
; CHECK-NEXT: tbnz w8, #24, .LBB41_58
; CHECK-NEXT: .LBB41_27: // %else94
; CHECK-NEXT: tbnz w8, #25, .LBB41_59
; CHECK-NEXT: .LBB41_28: // %else98
; CHECK-NEXT: tbnz w8, #26, .LBB41_60
; CHECK-NEXT: .LBB41_29: // %else102
; CHECK-NEXT: tbnz w8, #27, .LBB41_61
; CHECK-NEXT: .LBB41_30: // %else106
; CHECK-NEXT: tbnz w8, #28, .LBB41_62
; CHECK-NEXT: .LBB41_31: // %else110
; CHECK-NEXT: tbnz w8, #29, .LBB41_63
; CHECK-NEXT: .LBB41_32: // %else114
; CHECK-NEXT: tbnz w8, #30, .LBB41_64
; CHECK-NEXT: .LBB41_33: // %else118
; CHECK-NEXT: tbz w8, #31, .LBB41_35
; CHECK-NEXT: .LBB41_34: // %cond.load121
; CHECK-NEXT: mov w8, #31 // =0x1f
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w8
; CHECK-NEXT: .LBB41_35: // %else122
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ldr x8, [sp] // 8-byte Reload
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB41_36: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #3, .LBB41_6
; CHECK-NEXT: .LBB41_37: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #4, .LBB41_7
; CHECK-NEXT: .LBB41_38: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #5, .LBB41_8
; CHECK-NEXT: .LBB41_39: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #6, .LBB41_9
; CHECK-NEXT: .LBB41_40: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #7, .LBB41_10
; CHECK-NEXT: .LBB41_41: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #8, .LBB41_11
; CHECK-NEXT: .LBB41_42: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #9, .LBB41_12
; CHECK-NEXT: .LBB41_43: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #10, .LBB41_13
; CHECK-NEXT: .LBB41_44: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #11, .LBB41_14
; CHECK-NEXT: .LBB41_45: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #12, .LBB41_15
; CHECK-NEXT: .LBB41_46: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #13, .LBB41_16
; CHECK-NEXT: .LBB41_47: // %cond.load49
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #14, .LBB41_17
; CHECK-NEXT: .LBB41_48: // %cond.load53
; CHECK-NEXT: mov w9, #14 // =0xe
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #15, .LBB41_18
; CHECK-NEXT: .LBB41_49: // %cond.load57
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #16, .LBB41_19
; CHECK-NEXT: .LBB41_50: // %cond.load61
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #17, .LBB41_20
; CHECK-NEXT: .LBB41_51: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #18, .LBB41_21
; CHECK-NEXT: .LBB41_52: // %cond.load69
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #19, .LBB41_22
; CHECK-NEXT: .LBB41_53: // %cond.load73
; CHECK-NEXT: mov w9, #19 // =0x13
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #20, .LBB41_23
; CHECK-NEXT: .LBB41_54: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #21, .LBB41_24
; CHECK-NEXT: .LBB41_55: // %cond.load81
; CHECK-NEXT: mov w9, #21 // =0x15
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #22, .LBB41_25
; CHECK-NEXT: .LBB41_56: // %cond.load85
; CHECK-NEXT: mov w9, #22 // =0x16
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #23, .LBB41_26
; CHECK-NEXT: .LBB41_57: // %cond.load89
; CHECK-NEXT: mov w9, #23 // =0x17
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #24, .LBB41_27
; CHECK-NEXT: .LBB41_58: // %cond.load93
; CHECK-NEXT: mov w9, #24 // =0x18
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #25, .LBB41_28
; CHECK-NEXT: .LBB41_59: // %cond.load97
; CHECK-NEXT: mov w9, #25 // =0x19
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #26, .LBB41_29
; CHECK-NEXT: .LBB41_60: // %cond.load101
; CHECK-NEXT: mov w9, #26 // =0x1a
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #27, .LBB41_30
; CHECK-NEXT: .LBB41_61: // %cond.load105
; CHECK-NEXT: mov w9, #27 // =0x1b
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #28, .LBB41_31
; CHECK-NEXT: .LBB41_62: // %cond.load109
; CHECK-NEXT: mov w9, #28 // =0x1c
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #29, .LBB41_32
; CHECK-NEXT: .LBB41_63: // %cond.load113
; CHECK-NEXT: mov w9, #29 // =0x1d
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #30, .LBB41_33
; CHECK-NEXT: .LBB41_64: // %cond.load117
; CHECK-NEXT: mov w9, #30 // =0x1e
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbnz w8, #31, .LBB41_34
; CHECK-NEXT: b .LBB41_35
;
; CHECK-EXPAND-LABEL: masked_load_sext_v32i16i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.d, vl32
; CHECK-EXPAND-NEXT: ld1h { z0.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.d
; CHECK-EXPAND-NEXT: whilelo p2.d, xzr, x8
; CHECK-EXPAND-NEXT: ld1sh { z0.d }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.d, p1, z0.d
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%b = load <32 x i16>, ptr %bp
%mask = icmp eq <32 x i16> %b, zeroinitializer
%load = call <32 x i16> @llvm.masked.expandload.v32i16(ptr %ap, <32 x i1> %mask, <32 x i16> poison)
%ext = sext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, ptr %c
ret void
}
define void @masked_load_sext_v32i32i64(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_sext_v32i32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #112
; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: ptrue p1.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x1]
; CHECK-NEXT: cmpeq p0.s, p1/z, z0.s, #0
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: mov z5.b, z0.b[18]
; CHECK-NEXT: mov z6.b, z0.b[19]
; CHECK-NEXT: umov w13, v0.b[1]
; CHECK-NEXT: umov w3, v0.b[7]
; CHECK-NEXT: umov w4, v0.b[8]
; CHECK-NEXT: mov z7.b, z0.b[20]
; CHECK-NEXT: fmov w6, s0
; CHECK-NEXT: umov w5, v0.b[9]
; CHECK-NEXT: umov w12, v0.b[2]
; CHECK-NEXT: mov z16.b, z0.b[21]
; CHECK-NEXT: fmov w20, s5
; CHECK-NEXT: fmov w21, s6
; CHECK-NEXT: umov w18, v0.b[10]
; CHECK-NEXT: mov z17.b, z0.b[22]
; CHECK-NEXT: fmov w22, s7
; CHECK-NEXT: and w6, w6, #0x1
; CHECK-NEXT: umov w11, v0.b[3]
; CHECK-NEXT: umov w1, v0.b[11]
; CHECK-NEXT: bfi w6, w13, #1, #1
; CHECK-NEXT: ubfiz w13, w3, #7, #1
; CHECK-NEXT: ubfiz w3, w4, #8, #1
; CHECK-NEXT: fmov w23, s16
; CHECK-NEXT: ubfiz w4, w5, #9, #1
; CHECK-NEXT: ubfiz w5, w20, #18, #1
; CHECK-NEXT: ubfiz w20, w21, #19, #1
; CHECK-NEXT: umov w16, v0.b[12]
; CHECK-NEXT: mov z18.b, z0.b[23]
; CHECK-NEXT: fmov w24, s17
; CHECK-NEXT: bfi w6, w12, #2, #1
; CHECK-NEXT: orr w12, w13, w3
; CHECK-NEXT: ubfiz w13, w22, #20, #1
; CHECK-NEXT: umov w17, v0.b[13]
; CHECK-NEXT: mov z19.b, z0.b[24]
; CHECK-NEXT: orr w3, w5, w20
; CHECK-NEXT: ubfiz w18, w18, #10, #1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: mov z20.b, z0.b[25]
; CHECK-NEXT: orr w12, w12, w4
; CHECK-NEXT: orr w13, w3, w13
; CHECK-NEXT: ubfiz w3, w23, #21, #1
; CHECK-NEXT: umov w14, v0.b[14]
; CHECK-NEXT: mov z21.b, z0.b[26]
; CHECK-NEXT: fmov w25, s18
; CHECK-NEXT: ubfiz w1, w1, #11, #1
; CHECK-NEXT: bfi w6, w11, #3, #1
; CHECK-NEXT: orr w11, w12, w18
; CHECK-NEXT: ubfiz w12, w24, #22, #1
; CHECK-NEXT: umov w9, v0.b[5]
; CHECK-NEXT: mov z22.b, z0.b[27]
; CHECK-NEXT: fmov w26, s19
; CHECK-NEXT: orr w13, w13, w3
; CHECK-NEXT: ubfiz w16, w16, #12, #1
; CHECK-NEXT: fmov w27, s20
; CHECK-NEXT: orr w11, w11, w1
; CHECK-NEXT: orr w12, w13, w12
; CHECK-NEXT: ubfiz w13, w17, #13, #1
; CHECK-NEXT: umov w8, v0.b[6]
; CHECK-NEXT: mov z24.b, z0.b[29]
; CHECK-NEXT: fmov w28, s21
; CHECK-NEXT: ubfiz w17, w25, #23, #1
; CHECK-NEXT: bfi w6, w10, #4, #1
; CHECK-NEXT: orr w10, w11, w16
; CHECK-NEXT: mov z3.b, z0.b[16]
; CHECK-NEXT: mov z23.b, z0.b[28]
; CHECK-NEXT: fmov w29, s22
; CHECK-NEXT: ubfiz w11, w26, #24, #1
; CHECK-NEXT: orr w10, w10, w13
; CHECK-NEXT: ubfiz w13, w14, #14, #1
; CHECK-NEXT: umov w15, v0.b[15]
; CHECK-NEXT: mov z4.b, z0.b[17]
; CHECK-NEXT: orr w12, w12, w17
; CHECK-NEXT: ubfiz w14, w27, #25, #1
; CHECK-NEXT: bfi w6, w9, #5, #1
; CHECK-NEXT: mov z2.b, z0.b[30]
; CHECK-NEXT: orr w11, w12, w11
; CHECK-NEXT: ubfiz w9, w28, #26, #1
; CHECK-NEXT: orr w10, w10, w13
; CHECK-NEXT: fmov w13, s24
; CHECK-NEXT: fmov w7, s3
; CHECK-NEXT: fmov w30, s23
; CHECK-NEXT: orr w11, w11, w14
; CHECK-NEXT: bfi w6, w8, #6, #1
; CHECK-NEXT: ubfiz w8, w29, #27, #1
; CHECK-NEXT: fmov w19, s4
; CHECK-NEXT: orr w9, w11, w9
; CHECK-NEXT: ubfiz w12, w15, #15, #1
; CHECK-NEXT: mov z1.b, z0.b[31]
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: ubfiz w9, w13, #29, #1
; CHECK-NEXT: fmov w13, s2
; CHECK-NEXT: ubfiz w11, w7, #16, #1
; CHECK-NEXT: ubfiz w14, w30, #28, #1
; CHECK-NEXT: orr w10, w10, w12
; CHECK-NEXT: ubfiz w12, w19, #17, #1
; CHECK-NEXT: orr w10, w10, w11
; CHECK-NEXT: orr w8, w8, w14
; CHECK-NEXT: ubfiz w11, w13, #30, #1
; CHECK-NEXT: orr w10, w10, w12
; CHECK-NEXT: orr w8, w8, w9
; CHECK-NEXT: orr w9, w6, w10
; CHECK-NEXT: orr w8, w8, w11
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: orr w8, w8, w9, lsl #31
; CHECK-NEXT: tbz w8, #0, .LBB42_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0]
; CHECK-NEXT: add x0, x0, #4
; CHECK-NEXT: tbnz w8, #1, .LBB42_3
; CHECK-NEXT: b .LBB42_4
; CHECK-NEXT: .LBB42_2:
; CHECK-NEXT: adrp x9, .LCPI42_0
; CHECK-NEXT: add x9, x9, :lo12:.LCPI42_0
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x9]
; CHECK-NEXT: tbz w8, #1, .LBB42_4
; CHECK-NEXT: .LBB42_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: .LBB42_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB42_36
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB42_37
; CHECK-NEXT: .LBB42_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB42_38
; CHECK-NEXT: .LBB42_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB42_39
; CHECK-NEXT: .LBB42_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB42_40
; CHECK-NEXT: .LBB42_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB42_41
; CHECK-NEXT: .LBB42_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB42_42
; CHECK-NEXT: .LBB42_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB42_43
; CHECK-NEXT: .LBB42_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB42_44
; CHECK-NEXT: .LBB42_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB42_45
; CHECK-NEXT: .LBB42_14: // %else42
; CHECK-NEXT: tbnz w8, #12, .LBB42_46
; CHECK-NEXT: .LBB42_15: // %else46
; CHECK-NEXT: tbnz w8, #13, .LBB42_47
; CHECK-NEXT: .LBB42_16: // %else50
; CHECK-NEXT: tbnz w8, #14, .LBB42_48
; CHECK-NEXT: .LBB42_17: // %else54
; CHECK-NEXT: tbnz w8, #15, .LBB42_49
; CHECK-NEXT: .LBB42_18: // %else58
; CHECK-NEXT: tbnz w8, #16, .LBB42_50
; CHECK-NEXT: .LBB42_19: // %else62
; CHECK-NEXT: tbnz w8, #17, .LBB42_51
; CHECK-NEXT: .LBB42_20: // %else66
; CHECK-NEXT: tbnz w8, #18, .LBB42_52
; CHECK-NEXT: .LBB42_21: // %else70
; CHECK-NEXT: tbnz w8, #19, .LBB42_53
; CHECK-NEXT: .LBB42_22: // %else74
; CHECK-NEXT: tbnz w8, #20, .LBB42_54
; CHECK-NEXT: .LBB42_23: // %else78
; CHECK-NEXT: tbnz w8, #21, .LBB42_55
; CHECK-NEXT: .LBB42_24: // %else82
; CHECK-NEXT: tbnz w8, #22, .LBB42_56
; CHECK-NEXT: .LBB42_25: // %else86
; CHECK-NEXT: tbnz w8, #23, .LBB42_57
; CHECK-NEXT: .LBB42_26: // %else90
; CHECK-NEXT: tbnz w8, #24, .LBB42_58
; CHECK-NEXT: .LBB42_27: // %else94
; CHECK-NEXT: tbnz w8, #25, .LBB42_59
; CHECK-NEXT: .LBB42_28: // %else98
; CHECK-NEXT: tbnz w8, #26, .LBB42_60
; CHECK-NEXT: .LBB42_29: // %else102
; CHECK-NEXT: tbnz w8, #27, .LBB42_61
; CHECK-NEXT: .LBB42_30: // %else106
; CHECK-NEXT: tbnz w8, #28, .LBB42_62
; CHECK-NEXT: .LBB42_31: // %else110
; CHECK-NEXT: tbnz w8, #29, .LBB42_63
; CHECK-NEXT: .LBB42_32: // %else114
; CHECK-NEXT: tbnz w8, #30, .LBB42_64
; CHECK-NEXT: .LBB42_33: // %else118
; CHECK-NEXT: tbz w8, #31, .LBB42_35
; CHECK-NEXT: .LBB42_34: // %cond.load121
; CHECK-NEXT: mov w8, #31 // =0x1f
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w8
; CHECK-NEXT: .LBB42_35: // %else122
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB42_36: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #3, .LBB42_6
; CHECK-NEXT: .LBB42_37: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #4, .LBB42_7
; CHECK-NEXT: .LBB42_38: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #5, .LBB42_8
; CHECK-NEXT: .LBB42_39: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #6, .LBB42_9
; CHECK-NEXT: .LBB42_40: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #7, .LBB42_10
; CHECK-NEXT: .LBB42_41: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #8, .LBB42_11
; CHECK-NEXT: .LBB42_42: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #9, .LBB42_12
; CHECK-NEXT: .LBB42_43: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #10, .LBB42_13
; CHECK-NEXT: .LBB42_44: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #11, .LBB42_14
; CHECK-NEXT: .LBB42_45: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #12, .LBB42_15
; CHECK-NEXT: .LBB42_46: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #13, .LBB42_16
; CHECK-NEXT: .LBB42_47: // %cond.load49
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #14, .LBB42_17
; CHECK-NEXT: .LBB42_48: // %cond.load53
; CHECK-NEXT: mov w9, #14 // =0xe
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #15, .LBB42_18
; CHECK-NEXT: .LBB42_49: // %cond.load57
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #16, .LBB42_19
; CHECK-NEXT: .LBB42_50: // %cond.load61
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #17, .LBB42_20
; CHECK-NEXT: .LBB42_51: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #18, .LBB42_21
; CHECK-NEXT: .LBB42_52: // %cond.load69
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #19, .LBB42_22
; CHECK-NEXT: .LBB42_53: // %cond.load73
; CHECK-NEXT: mov w9, #19 // =0x13
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #20, .LBB42_23
; CHECK-NEXT: .LBB42_54: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #21, .LBB42_24
; CHECK-NEXT: .LBB42_55: // %cond.load81
; CHECK-NEXT: mov w9, #21 // =0x15
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #22, .LBB42_25
; CHECK-NEXT: .LBB42_56: // %cond.load85
; CHECK-NEXT: mov w9, #22 // =0x16
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #23, .LBB42_26
; CHECK-NEXT: .LBB42_57: // %cond.load89
; CHECK-NEXT: mov w9, #23 // =0x17
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #24, .LBB42_27
; CHECK-NEXT: .LBB42_58: // %cond.load93
; CHECK-NEXT: mov w9, #24 // =0x18
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #25, .LBB42_28
; CHECK-NEXT: .LBB42_59: // %cond.load97
; CHECK-NEXT: mov w9, #25 // =0x19
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #26, .LBB42_29
; CHECK-NEXT: .LBB42_60: // %cond.load101
; CHECK-NEXT: mov w9, #26 // =0x1a
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #27, .LBB42_30
; CHECK-NEXT: .LBB42_61: // %cond.load105
; CHECK-NEXT: mov w9, #27 // =0x1b
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #28, .LBB42_31
; CHECK-NEXT: .LBB42_62: // %cond.load109
; CHECK-NEXT: mov w9, #28 // =0x1c
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #29, .LBB42_32
; CHECK-NEXT: .LBB42_63: // %cond.load113
; CHECK-NEXT: mov w9, #29 // =0x1d
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #30, .LBB42_33
; CHECK-NEXT: .LBB42_64: // %cond.load117
; CHECK-NEXT: mov w9, #30 // =0x1e
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbnz w8, #31, .LBB42_34
; CHECK-NEXT: b .LBB42_35
;
; CHECK-EXPAND-LABEL: masked_load_sext_v32i32i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.d, vl32
; CHECK-EXPAND-NEXT: ld1w { z0.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.d
; CHECK-EXPAND-NEXT: whilelo p2.d, xzr, x8
; CHECK-EXPAND-NEXT: ld1sw { z0.d }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.d, p1, z0.d
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%b = load <32 x i32>, ptr %bp
%mask = icmp eq <32 x i32> %b, zeroinitializer
%load = call <32 x i32> @llvm.masked.expandload.v32i32(ptr %ap, <32 x i1> %mask, <32 x i32> poison)
%ext = sext <32 x i32> %load to <32 x i64>
store <32 x i64> %ext, ptr %c
ret void
}
define void @masked_load_zext_v128i8i16(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_zext_v128i8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b, vl128
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x1]
; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, #0
; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: umov w9, v1.b[1]
; CHECK-NEXT: fmov w8, s1
; CHECK-NEXT: mov z0.b, z1.b[18]
; CHECK-NEXT: umov w10, v1.b[7]
; CHECK-NEXT: umov w11, v1.b[8]
; CHECK-NEXT: mov z2.b, z1.b[19]
; CHECK-NEXT: umov w12, v1.b[2]
; CHECK-NEXT: umov w14, v1.b[9]
; CHECK-NEXT: umov w13, v1.b[3]
; CHECK-NEXT: and x8, x8, #0x1
; CHECK-NEXT: fmov w16, s0
; CHECK-NEXT: mov z0.b, z1.b[20]
; CHECK-NEXT: bfi x8, x9, #1, #1
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: umov w15, v1.b[10]
; CHECK-NEXT: ubfiz x10, x10, #7, #1
; CHECK-NEXT: ubfiz x11, x11, #8, #1
; CHECK-NEXT: mov z2.b, z1.b[21]
; CHECK-NEXT: bfi x8, x12, #2, #1
; CHECK-NEXT: fmov w12, s0
; CHECK-NEXT: ubfiz x16, x16, #18, #1
; CHECK-NEXT: ubfiz x9, x9, #19, #1
; CHECK-NEXT: ubfiz x14, x14, #9, #1
; CHECK-NEXT: orr x10, x10, x11
; CHECK-NEXT: umov w11, v1.b[11]
; CHECK-NEXT: mov z0.b, z1.b[22]
; CHECK-NEXT: ubfiz x15, x15, #10, #1
; CHECK-NEXT: ubfiz x12, x12, #20, #1
; CHECK-NEXT: orr x9, x16, x9
; CHECK-NEXT: orr x10, x10, x14
; CHECK-NEXT: fmov w14, s2
; CHECK-NEXT: bfi x8, x13, #3, #1
; CHECK-NEXT: orr x10, x10, x15
; CHECK-NEXT: orr x9, x9, x12
; CHECK-NEXT: umov w12, v1.b[12]
; CHECK-NEXT: fmov w13, s0
; CHECK-NEXT: ubfiz x11, x11, #11, #1
; CHECK-NEXT: umov w15, v1.b[13]
; CHECK-NEXT: mov z0.b, z1.b[16]
; CHECK-NEXT: ubfiz x14, x14, #21, #1
; CHECK-NEXT: mov z2.b, z1.b[17]
; CHECK-NEXT: umov w16, v1.b[4]
; CHECK-NEXT: ubfiz x13, x13, #22, #1
; CHECK-NEXT: orr x10, x10, x11
; CHECK-NEXT: umov w11, v1.b[14]
; CHECK-NEXT: orr x9, x9, x14
; CHECK-NEXT: ubfiz x12, x12, #12, #1
; CHECK-NEXT: umov w14, v1.b[5]
; CHECK-NEXT: orr x9, x9, x13
; CHECK-NEXT: umov w13, v1.b[15]
; CHECK-NEXT: ubfiz x15, x15, #13, #1
; CHECK-NEXT: orr x10, x10, x12
; CHECK-NEXT: fmov w12, s0
; CHECK-NEXT: mov z0.b, z1.b[23]
; CHECK-NEXT: ubfiz x11, x11, #14, #1
; CHECK-NEXT: orr x10, x10, x15
; CHECK-NEXT: fmov w15, s2
; CHECK-NEXT: mov z2.b, z1.b[24]
; CHECK-NEXT: bfi x8, x16, #4, #1
; CHECK-NEXT: umov w16, v1.b[6]
; CHECK-NEXT: ubfiz x13, x13, #15, #1
; CHECK-NEXT: orr x10, x10, x11
; CHECK-NEXT: fmov w11, s0
; CHECK-NEXT: mov z0.b, z1.b[25]
; CHECK-NEXT: ubfiz x12, x12, #16, #1
; CHECK-NEXT: bfi x8, x14, #5, #1
; CHECK-NEXT: orr x10, x10, x13
; CHECK-NEXT: fmov w13, s2
; CHECK-NEXT: mov z2.b, z1.b[26]
; CHECK-NEXT: ubfiz x11, x11, #23, #1
; CHECK-NEXT: orr x10, x10, x12
; CHECK-NEXT: ubfiz x14, x15, #17, #1
; CHECK-NEXT: fmov w12, s0
; CHECK-NEXT: mov z0.b, z1.b[27]
; CHECK-NEXT: bfi x8, x16, #6, #1
; CHECK-NEXT: ubfiz x13, x13, #24, #1
; CHECK-NEXT: orr x9, x9, x11
; CHECK-NEXT: fmov w11, s2
; CHECK-NEXT: mov z2.b, z1.b[28]
; CHECK-NEXT: orr x10, x10, x14
; CHECK-NEXT: orr x9, x9, x13
; CHECK-NEXT: ubfiz x12, x12, #25, #1
; CHECK-NEXT: fmov w13, s0
; CHECK-NEXT: mov z0.b, z1.b[29]
; CHECK-NEXT: ubfiz x11, x11, #26, #1
; CHECK-NEXT: orr x8, x8, x10
; CHECK-NEXT: orr x9, x9, x12
; CHECK-NEXT: fmov w12, s2
; CHECK-NEXT: mov z2.b, z1.b[30]
; CHECK-NEXT: orr x9, x9, x11
; CHECK-NEXT: ubfiz x11, x13, #27, #1
; CHECK-NEXT: fmov w13, s0
; CHECK-NEXT: mov z0.b, z1.b[31]
; CHECK-NEXT: orr x9, x9, x11
; CHECK-NEXT: ubfiz x12, x12, #28, #1
; CHECK-NEXT: ubfiz x11, x13, #29, #1
; CHECK-NEXT: fmov w13, s2
; CHECK-NEXT: orr x9, x9, x12
; CHECK-NEXT: mov z2.b, z1.b[32]
; CHECK-NEXT: fmov w10, s0
; CHECK-NEXT: mov z0.b, z1.b[33]
; CHECK-NEXT: orr x9, x9, x11
; CHECK-NEXT: ubfiz x12, x13, #30, #1
; CHECK-NEXT: lsl w10, w10, #31
; CHECK-NEXT: orr x9, x9, x12
; CHECK-NEXT: orr x8, x8, x9
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[34]
; CHECK-NEXT: orr x8, x8, x10
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #32
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[35]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #33
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[36]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #34
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[37]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #35
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[38]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #36
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[39]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #37
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[40]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #38
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[41]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #39
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[42]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #40
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[43]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #41
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[44]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #42
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[45]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #43
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[46]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #44
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[47]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #45
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[48]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #46
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[49]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #47
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[50]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #48
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[51]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #49
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[52]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #50
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[53]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #51
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[54]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #52
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[55]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #53
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[56]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #54
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[57]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #55
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[58]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #56
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[59]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #57
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[60]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #58
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: mov z0.b, z1.b[61]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: fmov w10, s0
; CHECK-NEXT: mov z0.b, z1.b[63]
; CHECK-NEXT: orr x8, x8, x9, lsl #59
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z1.b[62]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #60
; CHECK-NEXT: and w9, w10, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #61
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #62
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: orr x8, x8, x9, lsl #63
; CHECK-NEXT: tbz w8, #0, .LBB43_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0]
; CHECK-NEXT: add x0, x0, #1
; CHECK-NEXT: tbnz w8, #1, .LBB43_3
; CHECK-NEXT: b .LBB43_4
; CHECK-NEXT: .LBB43_2:
; CHECK-NEXT: adrp x9, .LCPI43_0
; CHECK-NEXT: add x9, x9, :lo12:.LCPI43_0
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9]
; CHECK-NEXT: tbz w8, #1, .LBB43_4
; CHECK-NEXT: .LBB43_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: .LBB43_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB43_181
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB43_182
; CHECK-NEXT: .LBB43_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB43_183
; CHECK-NEXT: .LBB43_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB43_184
; CHECK-NEXT: .LBB43_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB43_185
; CHECK-NEXT: .LBB43_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB43_186
; CHECK-NEXT: .LBB43_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB43_187
; CHECK-NEXT: .LBB43_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB43_188
; CHECK-NEXT: .LBB43_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB43_189
; CHECK-NEXT: .LBB43_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB43_190
; CHECK-NEXT: .LBB43_14: // %else42
; CHECK-NEXT: tbz w8, #12, .LBB43_16
; CHECK-NEXT: .LBB43_15: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: .LBB43_16: // %else46
; CHECK-NEXT: mov w12, #71 // =0x47
; CHECK-NEXT: mov w14, #72 // =0x48
; CHECK-NEXT: mov w9, #83 // =0x53
; CHECK-NEXT: mov w10, #84 // =0x54
; CHECK-NEXT: tbz w8, #13, .LBB43_18
; CHECK-NEXT: // %bb.17: // %cond.load49
; CHECK-NEXT: mov w11, #13 // =0xd
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w11
; CHECK-NEXT: ldrb w11, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w11
; CHECK-NEXT: .LBB43_18: // %else50
; CHECK-NEXT: mov w11, #73 // =0x49
; CHECK-NEXT: mov w13, #85 // =0x55
; CHECK-NEXT: tbz w8, #14, .LBB43_20
; CHECK-NEXT: // %bb.19: // %cond.load53
; CHECK-NEXT: mov w15, #14 // =0xe
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w15
; CHECK-NEXT: ldrb w15, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w15
; CHECK-NEXT: .LBB43_20: // %else54
; CHECK-NEXT: whilels p3.b, xzr, x12
; CHECK-NEXT: whilels p4.b, xzr, x14
; CHECK-NEXT: mov w14, #86 // =0x56
; CHECK-NEXT: whilels p1.b, xzr, x9
; CHECK-NEXT: mov w9, #74 // =0x4a
; CHECK-NEXT: whilels p2.b, xzr, x10
; CHECK-NEXT: tbz w8, #15, .LBB43_22
; CHECK-NEXT: // %bb.21: // %cond.load57
; CHECK-NEXT: mov w10, #15 // =0xf
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w10
; CHECK-NEXT: ldrb w10, [x0], #1
; CHECK-NEXT: cmpeq p5.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p5/m, w10
; CHECK-NEXT: .LBB43_22: // %else58
; CHECK-NEXT: lastb w10, p3, z1.b
; CHECK-NEXT: mov w1, #75 // =0x4b
; CHECK-NEXT: mov w17, #87 // =0x57
; CHECK-NEXT: lastb w12, p4, z1.b
; CHECK-NEXT: lastb w15, p1, z1.b
; CHECK-NEXT: lastb w16, p2, z1.b
; CHECK-NEXT: whilels p2.b, xzr, x11
; CHECK-NEXT: whilels p1.b, xzr, x13
; CHECK-NEXT: tbz w8, #16, .LBB43_24
; CHECK-NEXT: // %bb.23: // %cond.load61
; CHECK-NEXT: mov w11, #16 // =0x10
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w11
; CHECK-NEXT: ldrb w11, [x0], #1
; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p3/m, w11
; CHECK-NEXT: .LBB43_24: // %else62
; CHECK-NEXT: lastb w11, p2, z1.b
; CHECK-NEXT: mov w3, #76 // =0x4c
; CHECK-NEXT: mov w18, #88 // =0x58
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: whilels p2.b, xzr, x9
; CHECK-NEXT: whilels p1.b, xzr, x14
; CHECK-NEXT: tbz w8, #17, .LBB43_26
; CHECK-NEXT: // %bb.25: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p3/m, w9
; CHECK-NEXT: .LBB43_26: // %else66
; CHECK-NEXT: lastb w9, p2, z1.b
; CHECK-NEXT: ubfiz x5, x10, #7, #1
; CHECK-NEXT: ubfiz x7, x12, #8, #1
; CHECK-NEXT: ubfiz x4, x15, #19, #1
; CHECK-NEXT: ubfiz x6, x16, #20, #1
; CHECK-NEXT: mov w15, #89 // =0x59
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: whilels p2.b, xzr, x1
; CHECK-NEXT: mov w1, #77 // =0x4d
; CHECK-NEXT: whilels p1.b, xzr, x17
; CHECK-NEXT: mov w17, #64 // =0x40
; CHECK-NEXT: tbz w8, #18, .LBB43_28
; CHECK-NEXT: // %bb.27: // %cond.load69
; CHECK-NEXT: mov w10, #18 // =0x12
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w10
; CHECK-NEXT: ldrb w10, [x0], #1
; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p3/m, w10
; CHECK-NEXT: .LBB43_28: // %else70
; CHECK-NEXT: sub sp, sp, #64
; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: lastb w10, p2, z1.b
; CHECK-NEXT: orr x7, x5, x7
; CHECK-NEXT: ubfiz x5, x13, #21, #1
; CHECK-NEXT: mov w16, #65 // =0x41
; CHECK-NEXT: orr x19, x4, x6
; CHECK-NEXT: mov w4, #90 // =0x5a
; CHECK-NEXT: lastb w12, p1, z1.b
; CHECK-NEXT: whilels p2.b, xzr, x3
; CHECK-NEXT: ubfiz x3, x11, #9, #1
; CHECK-NEXT: whilels p1.b, xzr, x18
; CHECK-NEXT: mov w18, #78 // =0x4e
; CHECK-NEXT: tbz w8, #19, .LBB43_30
; CHECK-NEXT: // %bb.29: // %cond.load73
; CHECK-NEXT: mov w11, #19 // =0x13
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w11
; CHECK-NEXT: ldrb w11, [x0], #1
; CHECK-NEXT: cmpeq p3.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p3/m, w11
; CHECK-NEXT: .LBB43_30: // %else74
; CHECK-NEXT: lastb w11, p2, z1.b
; CHECK-NEXT: ubfiz x21, x9, #10, #1
; CHECK-NEXT: ubfiz x6, x14, #22, #1
; CHECK-NEXT: orr x7, x7, x3
; CHECK-NEXT: mov w3, #79 // =0x4f
; CHECK-NEXT: orr x20, x19, x5
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: mov w5, #91 // =0x5b
; CHECK-NEXT: whilels p3.b, xzr, x17
; CHECK-NEXT: mov w17, #66 // =0x42
; CHECK-NEXT: whilels p2.b, xzr, x1
; CHECK-NEXT: whilels p1.b, xzr, x15
; CHECK-NEXT: tbz w8, #20, .LBB43_32
; CHECK-NEXT: // %bb.31: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p4/m, w9
; CHECK-NEXT: .LBB43_32: // %else78
; CHECK-NEXT: lastb w9, p3, z1.b
; CHECK-NEXT: orr x19, x7, x21
; CHECK-NEXT: ubfiz x21, x10, #11, #1
; CHECK-NEXT: ubfiz x7, x12, #23, #1
; CHECK-NEXT: mov w1, #67 // =0x43
; CHECK-NEXT: orr x22, x20, x6
; CHECK-NEXT: lastb w14, p2, z1.b
; CHECK-NEXT: mov w6, #92 // =0x5c
; CHECK-NEXT: lastb w15, p1, z1.b
; CHECK-NEXT: whilels p3.b, xzr, x16
; CHECK-NEXT: whilels p2.b, xzr, x18
; CHECK-NEXT: whilels p1.b, xzr, x4
; CHECK-NEXT: mov w4, #80 // =0x50
; CHECK-NEXT: tbz w8, #21, .LBB43_34
; CHECK-NEXT: // %bb.33: // %cond.load81
; CHECK-NEXT: mov w10, #21 // =0x15
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w10
; CHECK-NEXT: ldrb w10, [x0], #1
; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p4/m, w10
; CHECK-NEXT: .LBB43_34: // %else82
; CHECK-NEXT: lastb w10, p3, z1.b
; CHECK-NEXT: orr x20, x19, x21
; CHECK-NEXT: ubfiz x21, x11, #12, #1
; CHECK-NEXT: ubfiz x19, x13, #24, #1
; CHECK-NEXT: mov w18, #68 // =0x44
; CHECK-NEXT: orr x23, x22, x7
; CHECK-NEXT: lastb w12, p2, z1.b
; CHECK-NEXT: mov w7, #93 // =0x5d
; CHECK-NEXT: lastb w16, p1, z1.b
; CHECK-NEXT: whilels p3.b, xzr, x17
; CHECK-NEXT: whilels p2.b, xzr, x3
; CHECK-NEXT: whilels p1.b, xzr, x5
; CHECK-NEXT: mov w5, #81 // =0x51
; CHECK-NEXT: tbz w8, #22, .LBB43_36
; CHECK-NEXT: // %bb.35: // %cond.load85
; CHECK-NEXT: mov w11, #22 // =0x16
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w11
; CHECK-NEXT: ldrb w11, [x0], #1
; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p4/m, w11
; CHECK-NEXT: .LBB43_36: // %else86
; CHECK-NEXT: lastb w11, p3, z1.b
; CHECK-NEXT: orr x20, x20, x21
; CHECK-NEXT: ubfiz x21, x14, #13, #1
; CHECK-NEXT: ubfiz x22, x15, #25, #1
; CHECK-NEXT: and x9, x9, #0x1
; CHECK-NEXT: mov w3, #69 // =0x45
; CHECK-NEXT: lastb w13, p2, z1.b
; CHECK-NEXT: orr x24, x23, x19
; CHECK-NEXT: mov w19, #94 // =0x5e
; CHECK-NEXT: lastb w17, p1, z1.b
; CHECK-NEXT: whilels p3.b, xzr, x1
; CHECK-NEXT: whilels p2.b, xzr, x4
; CHECK-NEXT: mov w4, #82 // =0x52
; CHECK-NEXT: whilels p1.b, xzr, x6
; CHECK-NEXT: mov w6, #95 // =0x5f
; CHECK-NEXT: tbz w8, #23, .LBB43_38
; CHECK-NEXT: // %bb.37: // %cond.load89
; CHECK-NEXT: mov w14, #23 // =0x17
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p4/m, w14
; CHECK-NEXT: .LBB43_38: // %else90
; CHECK-NEXT: lastb w14, p3, z1.b
; CHECK-NEXT: bfi x9, x10, #1, #1
; CHECK-NEXT: ubfiz x23, x16, #26, #1
; CHECK-NEXT: lastb w15, p2, z1.b
; CHECK-NEXT: lastb w1, p1, z1.b
; CHECK-NEXT: whilels p3.b, xzr, x18
; CHECK-NEXT: whilels p2.b, xzr, x5
; CHECK-NEXT: ubfiz x5, x12, #14, #1
; CHECK-NEXT: mov w12, #70 // =0x46
; CHECK-NEXT: whilels p1.b, xzr, x7
; CHECK-NEXT: orr x7, x20, x21
; CHECK-NEXT: orr x20, x24, x22
; CHECK-NEXT: tbz w8, #24, .LBB43_40
; CHECK-NEXT: // %bb.39: // %cond.load93
; CHECK-NEXT: mov w10, #24 // =0x18
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w10
; CHECK-NEXT: ldrb w10, [x0], #1
; CHECK-NEXT: cmpeq p4.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p4/m, w10
; CHECK-NEXT: .LBB43_40: // %else94
; CHECK-NEXT: lastb w10, p3, z1.b
; CHECK-NEXT: bfi x9, x11, #2, #1
; CHECK-NEXT: orr x5, x7, x5
; CHECK-NEXT: lastb w16, p2, z1.b
; CHECK-NEXT: lastb w18, p1, z1.b
; CHECK-NEXT: whilels p4.b, xzr, x3
; CHECK-NEXT: ubfiz x3, x13, #15, #1
; CHECK-NEXT: whilels p2.b, xzr, x4
; CHECK-NEXT: ubfiz x4, x17, #27, #1
; CHECK-NEXT: whilels p1.b, xzr, x19
; CHECK-NEXT: whilels p3.b, xzr, x6
; CHECK-NEXT: orr x6, x20, x23
; CHECK-NEXT: tbz w8, #25, .LBB43_42
; CHECK-NEXT: // %bb.41: // %cond.load97
; CHECK-NEXT: mov w11, #25 // =0x19
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w11
; CHECK-NEXT: ldrb w11, [x0], #1
; CHECK-NEXT: cmpeq p5.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p5/m, w11
; CHECK-NEXT: .LBB43_42: // %else98
; CHECK-NEXT: lastb w11, p4, z1.b
; CHECK-NEXT: bfi x9, x14, #3, #1
; CHECK-NEXT: ubfiz x15, x15, #16, #1
; CHECK-NEXT: ubfiz x1, x1, #28, #1
; CHECK-NEXT: orr x3, x5, x3
; CHECK-NEXT: orr x4, x6, x4
; CHECK-NEXT: lastb w13, p2, z1.b
; CHECK-NEXT: mov w14, #96 // =0x60
; CHECK-NEXT: lastb w17, p1, z1.b
; CHECK-NEXT: lastb w7, p3, z1.b
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: tbz w8, #26, .LBB43_44
; CHECK-NEXT: // %bb.43: // %cond.load101
; CHECK-NEXT: mov w12, #26 // =0x1a
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w12
; CHECK-NEXT: ldrb w12, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w12
; CHECK-NEXT: .LBB43_44: // %else102
; CHECK-NEXT: lastb w12, p1, z1.b
; CHECK-NEXT: bfi x9, x10, #4, #1
; CHECK-NEXT: ubfiz x16, x16, #17, #1
; CHECK-NEXT: ubfiz x18, x18, #29, #1
; CHECK-NEXT: lsl w10, w7, #31
; CHECK-NEXT: orr x3, x3, x15
; CHECK-NEXT: orr x1, x4, x1
; CHECK-NEXT: mov w15, #97 // =0x61
; CHECK-NEXT: tbz w8, #27, .LBB43_46
; CHECK-NEXT: // %bb.45: // %cond.load105
; CHECK-NEXT: mov w4, #27 // =0x1b
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w4
; CHECK-NEXT: ldrb w4, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w4
; CHECK-NEXT: .LBB43_46: // %else106
; CHECK-NEXT: bfi x9, x11, #5, #1
; CHECK-NEXT: ubfiz x13, x13, #18, #1
; CHECK-NEXT: ubfiz x17, x17, #30, #1
; CHECK-NEXT: whilels p1.b, xzr, x14
; CHECK-NEXT: orr x16, x3, x16
; CHECK-NEXT: orr x18, x1, x18
; CHECK-NEXT: mov w11, #98 // =0x62
; CHECK-NEXT: tbz w8, #28, .LBB43_48
; CHECK-NEXT: // %bb.47: // %cond.load109
; CHECK-NEXT: mov w14, #28 // =0x1c
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_48: // %else110
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: bfi x9, x12, #6, #1
; CHECK-NEXT: mov w12, #99 // =0x63
; CHECK-NEXT: whilels p1.b, xzr, x15
; CHECK-NEXT: orr x15, x16, x13
; CHECK-NEXT: orr x16, x18, x17
; CHECK-NEXT: tbz w8, #29, .LBB43_50
; CHECK-NEXT: // %bb.49: // %cond.load113
; CHECK-NEXT: mov w13, #29 // =0x1d
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_50: // %else114
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x9, x15
; CHECK-NEXT: orr x10, x16, x10
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #100 // =0x64
; CHECK-NEXT: tbz w8, #30, .LBB43_52
; CHECK-NEXT: // %bb.51: // %cond.load117
; CHECK-NEXT: mov w14, #30 // =0x1e
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_52: // %else118
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #101 // =0x65
; CHECK-NEXT: tbz w8, #31, .LBB43_54
; CHECK-NEXT: // %bb.53: // %cond.load121
; CHECK-NEXT: mov w13, #31 // =0x1f
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_54: // %else122
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #32
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #102 // =0x66
; CHECK-NEXT: tbz x8, #32, .LBB43_56
; CHECK-NEXT: // %bb.55: // %cond.load125
; CHECK-NEXT: mov w14, #32 // =0x20
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_56: // %else126
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #33
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #103 // =0x67
; CHECK-NEXT: tbz x8, #33, .LBB43_58
; CHECK-NEXT: // %bb.57: // %cond.load129
; CHECK-NEXT: mov w13, #33 // =0x21
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_58: // %else130
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #34
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #104 // =0x68
; CHECK-NEXT: tbz x8, #34, .LBB43_60
; CHECK-NEXT: // %bb.59: // %cond.load133
; CHECK-NEXT: mov w14, #34 // =0x22
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_60: // %else134
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #35
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #105 // =0x69
; CHECK-NEXT: tbz x8, #35, .LBB43_62
; CHECK-NEXT: // %bb.61: // %cond.load137
; CHECK-NEXT: mov w13, #35 // =0x23
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_62: // %else138
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #36
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #106 // =0x6a
; CHECK-NEXT: tbz x8, #36, .LBB43_64
; CHECK-NEXT: // %bb.63: // %cond.load141
; CHECK-NEXT: mov w14, #36 // =0x24
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_64: // %else142
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #37
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #107 // =0x6b
; CHECK-NEXT: tbz x8, #37, .LBB43_66
; CHECK-NEXT: // %bb.65: // %cond.load145
; CHECK-NEXT: mov w13, #37 // =0x25
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_66: // %else146
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #38
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #108 // =0x6c
; CHECK-NEXT: tbz x8, #38, .LBB43_68
; CHECK-NEXT: // %bb.67: // %cond.load149
; CHECK-NEXT: mov w14, #38 // =0x26
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_68: // %else150
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #39
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #109 // =0x6d
; CHECK-NEXT: tbz x8, #39, .LBB43_70
; CHECK-NEXT: // %bb.69: // %cond.load153
; CHECK-NEXT: mov w13, #39 // =0x27
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_70: // %else154
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #40
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #110 // =0x6e
; CHECK-NEXT: tbz x8, #40, .LBB43_72
; CHECK-NEXT: // %bb.71: // %cond.load157
; CHECK-NEXT: mov w14, #40 // =0x28
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_72: // %else158
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #41
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #111 // =0x6f
; CHECK-NEXT: tbz x8, #41, .LBB43_74
; CHECK-NEXT: // %bb.73: // %cond.load161
; CHECK-NEXT: mov w13, #41 // =0x29
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_74: // %else162
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #42
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #112 // =0x70
; CHECK-NEXT: tbz x8, #42, .LBB43_76
; CHECK-NEXT: // %bb.75: // %cond.load165
; CHECK-NEXT: mov w14, #42 // =0x2a
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_76: // %else166
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #43
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #113 // =0x71
; CHECK-NEXT: tbz x8, #43, .LBB43_78
; CHECK-NEXT: // %bb.77: // %cond.load169
; CHECK-NEXT: mov w13, #43 // =0x2b
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_78: // %else170
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #44
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #114 // =0x72
; CHECK-NEXT: tbz x8, #44, .LBB43_80
; CHECK-NEXT: // %bb.79: // %cond.load173
; CHECK-NEXT: mov w14, #44 // =0x2c
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_80: // %else174
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #45
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #115 // =0x73
; CHECK-NEXT: tbz x8, #45, .LBB43_82
; CHECK-NEXT: // %bb.81: // %cond.load177
; CHECK-NEXT: mov w13, #45 // =0x2d
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_82: // %else178
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #46
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #116 // =0x74
; CHECK-NEXT: tbz x8, #46, .LBB43_84
; CHECK-NEXT: // %bb.83: // %cond.load181
; CHECK-NEXT: mov w14, #46 // =0x2e
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_84: // %else182
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #47
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #117 // =0x75
; CHECK-NEXT: tbz x8, #47, .LBB43_86
; CHECK-NEXT: // %bb.85: // %cond.load185
; CHECK-NEXT: mov w13, #47 // =0x2f
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_86: // %else186
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #48
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #118 // =0x76
; CHECK-NEXT: tbz x8, #48, .LBB43_88
; CHECK-NEXT: // %bb.87: // %cond.load189
; CHECK-NEXT: mov w14, #48 // =0x30
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_88: // %else190
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #49
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #119 // =0x77
; CHECK-NEXT: tbz x8, #49, .LBB43_90
; CHECK-NEXT: // %bb.89: // %cond.load193
; CHECK-NEXT: mov w13, #49 // =0x31
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_90: // %else194
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #50
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #120 // =0x78
; CHECK-NEXT: tbz x8, #50, .LBB43_92
; CHECK-NEXT: // %bb.91: // %cond.load197
; CHECK-NEXT: mov w14, #50 // =0x32
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_92: // %else198
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #51
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #121 // =0x79
; CHECK-NEXT: tbz x8, #51, .LBB43_94
; CHECK-NEXT: // %bb.93: // %cond.load201
; CHECK-NEXT: mov w13, #51 // =0x33
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_94: // %else202
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #52
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #122 // =0x7a
; CHECK-NEXT: tbz x8, #52, .LBB43_96
; CHECK-NEXT: // %bb.95: // %cond.load205
; CHECK-NEXT: mov w14, #52 // =0x34
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_96: // %else206
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #53
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #123 // =0x7b
; CHECK-NEXT: tbz x8, #53, .LBB43_98
; CHECK-NEXT: // %bb.97: // %cond.load209
; CHECK-NEXT: mov w13, #53 // =0x35
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_98: // %else210
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #54
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #124 // =0x7c
; CHECK-NEXT: tbz x8, #54, .LBB43_100
; CHECK-NEXT: // %bb.99: // %cond.load213
; CHECK-NEXT: mov w14, #54 // =0x36
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_100: // %else214
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #55
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: mov w12, #125 // =0x7d
; CHECK-NEXT: tbz x8, #55, .LBB43_102
; CHECK-NEXT: // %bb.101: // %cond.load217
; CHECK-NEXT: mov w13, #55 // =0x37
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w13
; CHECK-NEXT: ldrb w13, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w13
; CHECK-NEXT: .LBB43_102: // %else218
; CHECK-NEXT: lastb w13, p1, z1.b
; CHECK-NEXT: orr x15, x15, x9, lsl #56
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: mov w11, #126 // =0x7e
; CHECK-NEXT: tbz x8, #56, .LBB43_104
; CHECK-NEXT: // %bb.103: // %cond.load221
; CHECK-NEXT: mov w14, #56 // =0x38
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_104: // %else222
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x15, x15, x10, lsl #57
; CHECK-NEXT: and w10, w13, #0x1
; CHECK-NEXT: whilels p1.b, xzr, x12
; CHECK-NEXT: tbz x8, #57, .LBB43_106
; CHECK-NEXT: // %bb.105: // %cond.load225
; CHECK-NEXT: mov w12, #57 // =0x39
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w12
; CHECK-NEXT: ldrb w12, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w12
; CHECK-NEXT: .LBB43_106: // %else226
; CHECK-NEXT: lastb w12, p1, z1.b
; CHECK-NEXT: orr x13, x15, x9, lsl #58
; CHECK-NEXT: mov w9, #127 // =0x7f
; CHECK-NEXT: whilels p1.b, xzr, x11
; CHECK-NEXT: and w11, w14, #0x1
; CHECK-NEXT: tbz x8, #58, .LBB43_108
; CHECK-NEXT: // %bb.107: // %cond.load229
; CHECK-NEXT: mov w14, #58 // =0x3a
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w14
; CHECK-NEXT: ldrb w14, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w14
; CHECK-NEXT: .LBB43_108: // %else230
; CHECK-NEXT: lastb w14, p1, z1.b
; CHECK-NEXT: orr x13, x13, x10, lsl #59
; CHECK-NEXT: and w10, w12, #0x1
; CHECK-NEXT: tbz x8, #59, .LBB43_110
; CHECK-NEXT: // %bb.109: // %cond.load233
; CHECK-NEXT: mov w12, #59 // =0x3b
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w12
; CHECK-NEXT: ldrb w12, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w12
; CHECK-NEXT: .LBB43_110: // %else234
; CHECK-NEXT: orr x12, x13, x11, lsl #60
; CHECK-NEXT: whilels p1.b, xzr, x9
; CHECK-NEXT: and w9, w14, #0x1
; CHECK-NEXT: tbz x8, #60, .LBB43_112
; CHECK-NEXT: // %bb.111: // %cond.load237
; CHECK-NEXT: mov w11, #60 // =0x3c
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w11
; CHECK-NEXT: ldrb w11, [x0], #1
; CHECK-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p2/m, w11
; CHECK-NEXT: .LBB43_112: // %else238
; CHECK-NEXT: lastb w11, p1, z1.b
; CHECK-NEXT: orr x10, x12, x10, lsl #61
; CHECK-NEXT: tbnz x8, #61, .LBB43_191
; CHECK-NEXT: // %bb.113: // %else242
; CHECK-NEXT: orr x9, x10, x9, lsl #62
; CHECK-NEXT: tbnz x8, #62, .LBB43_192
; CHECK-NEXT: .LBB43_114: // %else246
; CHECK-NEXT: orr x9, x9, x11, lsl #63
; CHECK-NEXT: tbnz x8, #63, .LBB43_193
; CHECK-NEXT: .LBB43_115: // %else250
; CHECK-NEXT: tbnz w9, #0, .LBB43_194
; CHECK-NEXT: .LBB43_116: // %else254
; CHECK-NEXT: tbnz w9, #1, .LBB43_195
; CHECK-NEXT: .LBB43_117: // %else258
; CHECK-NEXT: tbnz w9, #2, .LBB43_196
; CHECK-NEXT: .LBB43_118: // %else262
; CHECK-NEXT: tbnz w9, #3, .LBB43_197
; CHECK-NEXT: .LBB43_119: // %else266
; CHECK-NEXT: tbnz w9, #4, .LBB43_198
; CHECK-NEXT: .LBB43_120: // %else270
; CHECK-NEXT: tbnz w9, #5, .LBB43_199
; CHECK-NEXT: .LBB43_121: // %else274
; CHECK-NEXT: tbnz w9, #6, .LBB43_200
; CHECK-NEXT: .LBB43_122: // %else278
; CHECK-NEXT: tbnz w9, #7, .LBB43_201
; CHECK-NEXT: .LBB43_123: // %else282
; CHECK-NEXT: tbnz w9, #8, .LBB43_202
; CHECK-NEXT: .LBB43_124: // %else286
; CHECK-NEXT: tbnz w9, #9, .LBB43_203
; CHECK-NEXT: .LBB43_125: // %else290
; CHECK-NEXT: tbnz w9, #10, .LBB43_204
; CHECK-NEXT: .LBB43_126: // %else294
; CHECK-NEXT: tbnz w9, #11, .LBB43_205
; CHECK-NEXT: .LBB43_127: // %else298
; CHECK-NEXT: tbnz w9, #12, .LBB43_206
; CHECK-NEXT: .LBB43_128: // %else302
; CHECK-NEXT: tbnz w9, #13, .LBB43_207
; CHECK-NEXT: .LBB43_129: // %else306
; CHECK-NEXT: tbnz w9, #14, .LBB43_208
; CHECK-NEXT: .LBB43_130: // %else310
; CHECK-NEXT: tbnz w9, #15, .LBB43_209
; CHECK-NEXT: .LBB43_131: // %else314
; CHECK-NEXT: tbnz w9, #16, .LBB43_210
; CHECK-NEXT: .LBB43_132: // %else318
; CHECK-NEXT: tbnz w9, #17, .LBB43_211
; CHECK-NEXT: .LBB43_133: // %else322
; CHECK-NEXT: tbnz w9, #18, .LBB43_212
; CHECK-NEXT: .LBB43_134: // %else326
; CHECK-NEXT: tbnz w9, #19, .LBB43_213
; CHECK-NEXT: .LBB43_135: // %else330
; CHECK-NEXT: tbnz w9, #20, .LBB43_214
; CHECK-NEXT: .LBB43_136: // %else334
; CHECK-NEXT: tbnz w9, #21, .LBB43_215
; CHECK-NEXT: .LBB43_137: // %else338
; CHECK-NEXT: tbnz w9, #22, .LBB43_216
; CHECK-NEXT: .LBB43_138: // %else342
; CHECK-NEXT: tbnz w9, #23, .LBB43_217
; CHECK-NEXT: .LBB43_139: // %else346
; CHECK-NEXT: tbnz w9, #24, .LBB43_218
; CHECK-NEXT: .LBB43_140: // %else350
; CHECK-NEXT: tbnz w9, #25, .LBB43_219
; CHECK-NEXT: .LBB43_141: // %else354
; CHECK-NEXT: tbnz w9, #26, .LBB43_220
; CHECK-NEXT: .LBB43_142: // %else358
; CHECK-NEXT: tbnz w9, #27, .LBB43_221
; CHECK-NEXT: .LBB43_143: // %else362
; CHECK-NEXT: tbnz w9, #28, .LBB43_222
; CHECK-NEXT: .LBB43_144: // %else366
; CHECK-NEXT: tbnz w9, #29, .LBB43_223
; CHECK-NEXT: .LBB43_145: // %else370
; CHECK-NEXT: tbnz w9, #30, .LBB43_224
; CHECK-NEXT: .LBB43_146: // %else374
; CHECK-NEXT: tbnz w9, #31, .LBB43_225
; CHECK-NEXT: .LBB43_147: // %else378
; CHECK-NEXT: tbnz x9, #32, .LBB43_226
; CHECK-NEXT: .LBB43_148: // %else382
; CHECK-NEXT: tbnz x9, #33, .LBB43_227
; CHECK-NEXT: .LBB43_149: // %else386
; CHECK-NEXT: tbnz x9, #34, .LBB43_228
; CHECK-NEXT: .LBB43_150: // %else390
; CHECK-NEXT: tbnz x9, #35, .LBB43_229
; CHECK-NEXT: .LBB43_151: // %else394
; CHECK-NEXT: tbnz x9, #36, .LBB43_230
; CHECK-NEXT: .LBB43_152: // %else398
; CHECK-NEXT: tbnz x9, #37, .LBB43_231
; CHECK-NEXT: .LBB43_153: // %else402
; CHECK-NEXT: tbnz x9, #38, .LBB43_232
; CHECK-NEXT: .LBB43_154: // %else406
; CHECK-NEXT: tbnz x9, #39, .LBB43_233
; CHECK-NEXT: .LBB43_155: // %else410
; CHECK-NEXT: tbnz x9, #40, .LBB43_234
; CHECK-NEXT: .LBB43_156: // %else414
; CHECK-NEXT: tbnz x9, #41, .LBB43_235
; CHECK-NEXT: .LBB43_157: // %else418
; CHECK-NEXT: tbnz x9, #42, .LBB43_236
; CHECK-NEXT: .LBB43_158: // %else422
; CHECK-NEXT: tbnz x9, #43, .LBB43_237
; CHECK-NEXT: .LBB43_159: // %else426
; CHECK-NEXT: tbnz x9, #44, .LBB43_238
; CHECK-NEXT: .LBB43_160: // %else430
; CHECK-NEXT: tbnz x9, #45, .LBB43_239
; CHECK-NEXT: .LBB43_161: // %else434
; CHECK-NEXT: tbnz x9, #46, .LBB43_240
; CHECK-NEXT: .LBB43_162: // %else438
; CHECK-NEXT: tbnz x9, #47, .LBB43_241
; CHECK-NEXT: .LBB43_163: // %else442
; CHECK-NEXT: tbnz x9, #48, .LBB43_242
; CHECK-NEXT: .LBB43_164: // %else446
; CHECK-NEXT: tbnz x9, #49, .LBB43_243
; CHECK-NEXT: .LBB43_165: // %else450
; CHECK-NEXT: tbnz x9, #50, .LBB43_244
; CHECK-NEXT: .LBB43_166: // %else454
; CHECK-NEXT: tbnz x9, #51, .LBB43_245
; CHECK-NEXT: .LBB43_167: // %else458
; CHECK-NEXT: tbnz x9, #52, .LBB43_246
; CHECK-NEXT: .LBB43_168: // %else462
; CHECK-NEXT: tbnz x9, #53, .LBB43_247
; CHECK-NEXT: .LBB43_169: // %else466
; CHECK-NEXT: tbnz x9, #54, .LBB43_248
; CHECK-NEXT: .LBB43_170: // %else470
; CHECK-NEXT: tbnz x9, #55, .LBB43_249
; CHECK-NEXT: .LBB43_171: // %else474
; CHECK-NEXT: tbnz x9, #56, .LBB43_250
; CHECK-NEXT: .LBB43_172: // %else478
; CHECK-NEXT: tbnz x9, #57, .LBB43_251
; CHECK-NEXT: .LBB43_173: // %else482
; CHECK-NEXT: tbnz x9, #58, .LBB43_252
; CHECK-NEXT: .LBB43_174: // %else486
; CHECK-NEXT: tbnz x9, #59, .LBB43_253
; CHECK-NEXT: .LBB43_175: // %else490
; CHECK-NEXT: tbnz x9, #60, .LBB43_254
; CHECK-NEXT: .LBB43_176: // %else494
; CHECK-NEXT: tbnz x9, #61, .LBB43_255
; CHECK-NEXT: .LBB43_177: // %else498
; CHECK-NEXT: tbnz x9, #62, .LBB43_256
; CHECK-NEXT: .LBB43_178: // %else502
; CHECK-NEXT: tbz x9, #63, .LBB43_180
; CHECK-NEXT: .LBB43_179: // %cond.load505
; CHECK-NEXT: mov w8, #127 // =0x7f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: .LBB43_180: // %else506
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB43_181: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #3, .LBB43_6
; CHECK-NEXT: .LBB43_182: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #4, .LBB43_7
; CHECK-NEXT: .LBB43_183: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #5, .LBB43_8
; CHECK-NEXT: .LBB43_184: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #6, .LBB43_9
; CHECK-NEXT: .LBB43_185: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #7, .LBB43_10
; CHECK-NEXT: .LBB43_186: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #8, .LBB43_11
; CHECK-NEXT: .LBB43_187: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #9, .LBB43_12
; CHECK-NEXT: .LBB43_188: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #10, .LBB43_13
; CHECK-NEXT: .LBB43_189: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #11, .LBB43_14
; CHECK-NEXT: .LBB43_190: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbnz w8, #12, .LBB43_15
; CHECK-NEXT: b .LBB43_16
; CHECK-NEXT: .LBB43_191: // %cond.load241
; CHECK-NEXT: mov w12, #61 // =0x3d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w12
; CHECK-NEXT: ldrb w12, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w12
; CHECK-NEXT: orr x9, x10, x9, lsl #62
; CHECK-NEXT: tbz x8, #62, .LBB43_114
; CHECK-NEXT: .LBB43_192: // %cond.load245
; CHECK-NEXT: mov w10, #62 // =0x3e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w10
; CHECK-NEXT: ldrb w10, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w10
; CHECK-NEXT: orr x9, x9, x11, lsl #63
; CHECK-NEXT: tbz x8, #63, .LBB43_115
; CHECK-NEXT: .LBB43_193: // %cond.load249
; CHECK-NEXT: mov w8, #63 // =0x3f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #0, .LBB43_116
; CHECK-NEXT: .LBB43_194: // %cond.load253
; CHECK-NEXT: mov w8, #64 // =0x40
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #1, .LBB43_117
; CHECK-NEXT: .LBB43_195: // %cond.load257
; CHECK-NEXT: mov w8, #65 // =0x41
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #2, .LBB43_118
; CHECK-NEXT: .LBB43_196: // %cond.load261
; CHECK-NEXT: mov w8, #66 // =0x42
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #3, .LBB43_119
; CHECK-NEXT: .LBB43_197: // %cond.load265
; CHECK-NEXT: mov w8, #67 // =0x43
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #4, .LBB43_120
; CHECK-NEXT: .LBB43_198: // %cond.load269
; CHECK-NEXT: mov w8, #68 // =0x44
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #5, .LBB43_121
; CHECK-NEXT: .LBB43_199: // %cond.load273
; CHECK-NEXT: mov w8, #69 // =0x45
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #6, .LBB43_122
; CHECK-NEXT: .LBB43_200: // %cond.load277
; CHECK-NEXT: mov w8, #70 // =0x46
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #7, .LBB43_123
; CHECK-NEXT: .LBB43_201: // %cond.load281
; CHECK-NEXT: mov w8, #71 // =0x47
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #8, .LBB43_124
; CHECK-NEXT: .LBB43_202: // %cond.load285
; CHECK-NEXT: mov w8, #72 // =0x48
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #9, .LBB43_125
; CHECK-NEXT: .LBB43_203: // %cond.load289
; CHECK-NEXT: mov w8, #73 // =0x49
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #10, .LBB43_126
; CHECK-NEXT: .LBB43_204: // %cond.load293
; CHECK-NEXT: mov w8, #74 // =0x4a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #11, .LBB43_127
; CHECK-NEXT: .LBB43_205: // %cond.load297
; CHECK-NEXT: mov w8, #75 // =0x4b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #12, .LBB43_128
; CHECK-NEXT: .LBB43_206: // %cond.load301
; CHECK-NEXT: mov w8, #76 // =0x4c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #13, .LBB43_129
; CHECK-NEXT: .LBB43_207: // %cond.load305
; CHECK-NEXT: mov w8, #77 // =0x4d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #14, .LBB43_130
; CHECK-NEXT: .LBB43_208: // %cond.load309
; CHECK-NEXT: mov w8, #78 // =0x4e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #15, .LBB43_131
; CHECK-NEXT: .LBB43_209: // %cond.load313
; CHECK-NEXT: mov w8, #79 // =0x4f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #16, .LBB43_132
; CHECK-NEXT: .LBB43_210: // %cond.load317
; CHECK-NEXT: mov w8, #80 // =0x50
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #17, .LBB43_133
; CHECK-NEXT: .LBB43_211: // %cond.load321
; CHECK-NEXT: mov w8, #81 // =0x51
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #18, .LBB43_134
; CHECK-NEXT: .LBB43_212: // %cond.load325
; CHECK-NEXT: mov w8, #82 // =0x52
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #19, .LBB43_135
; CHECK-NEXT: .LBB43_213: // %cond.load329
; CHECK-NEXT: mov w8, #83 // =0x53
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #20, .LBB43_136
; CHECK-NEXT: .LBB43_214: // %cond.load333
; CHECK-NEXT: mov w8, #84 // =0x54
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #21, .LBB43_137
; CHECK-NEXT: .LBB43_215: // %cond.load337
; CHECK-NEXT: mov w8, #85 // =0x55
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #22, .LBB43_138
; CHECK-NEXT: .LBB43_216: // %cond.load341
; CHECK-NEXT: mov w8, #86 // =0x56
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #23, .LBB43_139
; CHECK-NEXT: .LBB43_217: // %cond.load345
; CHECK-NEXT: mov w8, #87 // =0x57
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #24, .LBB43_140
; CHECK-NEXT: .LBB43_218: // %cond.load349
; CHECK-NEXT: mov w8, #88 // =0x58
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #25, .LBB43_141
; CHECK-NEXT: .LBB43_219: // %cond.load353
; CHECK-NEXT: mov w8, #89 // =0x59
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #26, .LBB43_142
; CHECK-NEXT: .LBB43_220: // %cond.load357
; CHECK-NEXT: mov w8, #90 // =0x5a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #27, .LBB43_143
; CHECK-NEXT: .LBB43_221: // %cond.load361
; CHECK-NEXT: mov w8, #91 // =0x5b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #28, .LBB43_144
; CHECK-NEXT: .LBB43_222: // %cond.load365
; CHECK-NEXT: mov w8, #92 // =0x5c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #29, .LBB43_145
; CHECK-NEXT: .LBB43_223: // %cond.load369
; CHECK-NEXT: mov w8, #93 // =0x5d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #30, .LBB43_146
; CHECK-NEXT: .LBB43_224: // %cond.load373
; CHECK-NEXT: mov w8, #94 // =0x5e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz w9, #31, .LBB43_147
; CHECK-NEXT: .LBB43_225: // %cond.load377
; CHECK-NEXT: mov w8, #95 // =0x5f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #32, .LBB43_148
; CHECK-NEXT: .LBB43_226: // %cond.load381
; CHECK-NEXT: mov w8, #96 // =0x60
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #33, .LBB43_149
; CHECK-NEXT: .LBB43_227: // %cond.load385
; CHECK-NEXT: mov w8, #97 // =0x61
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #34, .LBB43_150
; CHECK-NEXT: .LBB43_228: // %cond.load389
; CHECK-NEXT: mov w8, #98 // =0x62
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #35, .LBB43_151
; CHECK-NEXT: .LBB43_229: // %cond.load393
; CHECK-NEXT: mov w8, #99 // =0x63
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #36, .LBB43_152
; CHECK-NEXT: .LBB43_230: // %cond.load397
; CHECK-NEXT: mov w8, #100 // =0x64
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #37, .LBB43_153
; CHECK-NEXT: .LBB43_231: // %cond.load401
; CHECK-NEXT: mov w8, #101 // =0x65
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #38, .LBB43_154
; CHECK-NEXT: .LBB43_232: // %cond.load405
; CHECK-NEXT: mov w8, #102 // =0x66
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #39, .LBB43_155
; CHECK-NEXT: .LBB43_233: // %cond.load409
; CHECK-NEXT: mov w8, #103 // =0x67
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #40, .LBB43_156
; CHECK-NEXT: .LBB43_234: // %cond.load413
; CHECK-NEXT: mov w8, #104 // =0x68
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #41, .LBB43_157
; CHECK-NEXT: .LBB43_235: // %cond.load417
; CHECK-NEXT: mov w8, #105 // =0x69
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #42, .LBB43_158
; CHECK-NEXT: .LBB43_236: // %cond.load421
; CHECK-NEXT: mov w8, #106 // =0x6a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #43, .LBB43_159
; CHECK-NEXT: .LBB43_237: // %cond.load425
; CHECK-NEXT: mov w8, #107 // =0x6b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #44, .LBB43_160
; CHECK-NEXT: .LBB43_238: // %cond.load429
; CHECK-NEXT: mov w8, #108 // =0x6c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #45, .LBB43_161
; CHECK-NEXT: .LBB43_239: // %cond.load433
; CHECK-NEXT: mov w8, #109 // =0x6d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #46, .LBB43_162
; CHECK-NEXT: .LBB43_240: // %cond.load437
; CHECK-NEXT: mov w8, #110 // =0x6e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #47, .LBB43_163
; CHECK-NEXT: .LBB43_241: // %cond.load441
; CHECK-NEXT: mov w8, #111 // =0x6f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #48, .LBB43_164
; CHECK-NEXT: .LBB43_242: // %cond.load445
; CHECK-NEXT: mov w8, #112 // =0x70
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #49, .LBB43_165
; CHECK-NEXT: .LBB43_243: // %cond.load449
; CHECK-NEXT: mov w8, #113 // =0x71
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #50, .LBB43_166
; CHECK-NEXT: .LBB43_244: // %cond.load453
; CHECK-NEXT: mov w8, #114 // =0x72
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #51, .LBB43_167
; CHECK-NEXT: .LBB43_245: // %cond.load457
; CHECK-NEXT: mov w8, #115 // =0x73
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #52, .LBB43_168
; CHECK-NEXT: .LBB43_246: // %cond.load461
; CHECK-NEXT: mov w8, #116 // =0x74
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #53, .LBB43_169
; CHECK-NEXT: .LBB43_247: // %cond.load465
; CHECK-NEXT: mov w8, #117 // =0x75
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #54, .LBB43_170
; CHECK-NEXT: .LBB43_248: // %cond.load469
; CHECK-NEXT: mov w8, #118 // =0x76
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #55, .LBB43_171
; CHECK-NEXT: .LBB43_249: // %cond.load473
; CHECK-NEXT: mov w8, #119 // =0x77
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #56, .LBB43_172
; CHECK-NEXT: .LBB43_250: // %cond.load477
; CHECK-NEXT: mov w8, #120 // =0x78
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #57, .LBB43_173
; CHECK-NEXT: .LBB43_251: // %cond.load481
; CHECK-NEXT: mov w8, #121 // =0x79
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #58, .LBB43_174
; CHECK-NEXT: .LBB43_252: // %cond.load485
; CHECK-NEXT: mov w8, #122 // =0x7a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #59, .LBB43_175
; CHECK-NEXT: .LBB43_253: // %cond.load489
; CHECK-NEXT: mov w8, #123 // =0x7b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #60, .LBB43_176
; CHECK-NEXT: .LBB43_254: // %cond.load493
; CHECK-NEXT: mov w8, #124 // =0x7c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #61, .LBB43_177
; CHECK-NEXT: .LBB43_255: // %cond.load497
; CHECK-NEXT: mov w8, #125 // =0x7d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbz x9, #62, .LBB43_178
; CHECK-NEXT: .LBB43_256: // %cond.load501
; CHECK-NEXT: mov w8, #126 // =0x7e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: tbnz x9, #63, .LBB43_179
; CHECK-NEXT: b .LBB43_180
;
; CHECK-EXPAND-LABEL: masked_load_zext_v128i8i16:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.h, vl128
; CHECK-EXPAND-NEXT: ld1b { z0.h }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.h, p0/z, z0.h, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.h
; CHECK-EXPAND-NEXT: whilelo p2.h, xzr, x8
; CHECK-EXPAND-NEXT: ld1b { z0.h }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.h, p1, z0.h
; CHECK-EXPAND-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%b = load <128 x i8>, ptr %bp
%mask = icmp eq <128 x i8> %b, zeroinitializer
%load = call <128 x i8> @llvm.masked.expandload.v128i8(ptr %ap, <128 x i1> %mask, <128 x i8> poison)
%ext = zext <128 x i8> %load to <128 x i16>
store <128 x i16> %ext, ptr %c
ret void
}
define void @masked_load_zext_v64i8i32(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_zext_v64i8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #112
; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: ptrue p1.b, vl64
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x1]
; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, #0
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: umov w11, v0.b[1]
; CHECK-NEXT: fmov w22, s0
; CHECK-NEXT: umov w12, v0.b[2]
; CHECK-NEXT: umov w13, v0.b[3]
; CHECK-NEXT: umov w14, v0.b[7]
; CHECK-NEXT: umov w1, v0.b[8]
; CHECK-NEXT: umov w16, v0.b[9]
; CHECK-NEXT: mov z3.b, z0.b[18]
; CHECK-NEXT: mov z5.b, z0.b[19]
; CHECK-NEXT: and x22, x22, #0x1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: umov w17, v0.b[10]
; CHECK-NEXT: bfi x22, x11, #1, #1
; CHECK-NEXT: mov z6.b, z0.b[20]
; CHECK-NEXT: umov w3, v0.b[11]
; CHECK-NEXT: mov z4.b, z0.b[21]
; CHECK-NEXT: umov w9, v0.b[5]
; CHECK-NEXT: mov z7.b, z0.b[22]
; CHECK-NEXT: bfi x22, x12, #2, #1
; CHECK-NEXT: fmov w19, s3
; CHECK-NEXT: fmov w20, s5
; CHECK-NEXT: ubfiz x14, x14, #7, #1
; CHECK-NEXT: ubfiz x1, x1, #8, #1
; CHECK-NEXT: umov w4, v0.b[12]
; CHECK-NEXT: bfi x22, x13, #3, #1
; CHECK-NEXT: mov z16.b, z0.b[23]
; CHECK-NEXT: fmov w21, s6
; CHECK-NEXT: ubfiz x16, x16, #9, #1
; CHECK-NEXT: umov w8, v0.b[6]
; CHECK-NEXT: umov w5, v0.b[13]
; CHECK-NEXT: mov z17.b, z0.b[24]
; CHECK-NEXT: fmov w23, s4
; CHECK-NEXT: orr x14, x14, x1
; CHECK-NEXT: bfi x22, x10, #4, #1
; CHECK-NEXT: ubfiz x10, x17, #10, #1
; CHECK-NEXT: mov z18.b, z0.b[25]
; CHECK-NEXT: fmov w24, s7
; CHECK-NEXT: ubfiz x13, x19, #18, #1
; CHECK-NEXT: ubfiz x19, x20, #19, #1
; CHECK-NEXT: orr x14, x14, x16
; CHECK-NEXT: ubfiz x16, x3, #11, #1
; CHECK-NEXT: umov w15, v0.b[14]
; CHECK-NEXT: mov z19.b, z0.b[26]
; CHECK-NEXT: fmov w25, s16
; CHECK-NEXT: ubfiz x1, x21, #20, #1
; CHECK-NEXT: orr x10, x14, x10
; CHECK-NEXT: bfi x22, x9, #5, #1
; CHECK-NEXT: mov z20.b, z0.b[27]
; CHECK-NEXT: fmov w26, s17
; CHECK-NEXT: orr x13, x13, x19
; CHECK-NEXT: ubfiz x9, x4, #12, #1
; CHECK-NEXT: orr x10, x10, x16
; CHECK-NEXT: ubfiz x16, x23, #21, #1
; CHECK-NEXT: umov w18, v0.b[15]
; CHECK-NEXT: mov z1.b, z0.b[16]
; CHECK-NEXT: mov z21.b, z0.b[28]
; CHECK-NEXT: fmov w11, s18
; CHECK-NEXT: orr x13, x13, x1
; CHECK-NEXT: ubfiz x14, x5, #13, #1
; CHECK-NEXT: bfi x22, x8, #6, #1
; CHECK-NEXT: ubfiz x8, x24, #22, #1
; CHECK-NEXT: mov z2.b, z0.b[17]
; CHECK-NEXT: mov z22.b, z0.b[29]
; CHECK-NEXT: fmov w27, s19
; CHECK-NEXT: orr x9, x10, x9
; CHECK-NEXT: orr x10, x13, x16
; CHECK-NEXT: ubfiz x13, x25, #23, #1
; CHECK-NEXT: mov z5.b, z0.b[30]
; CHECK-NEXT: fmov w28, s20
; CHECK-NEXT: orr x9, x9, x14
; CHECK-NEXT: orr x8, x10, x8
; CHECK-NEXT: ubfiz x10, x15, #14, #1
; CHECK-NEXT: ubfiz x14, x26, #24, #1
; CHECK-NEXT: fmov w6, s1
; CHECK-NEXT: fmov w29, s21
; CHECK-NEXT: orr x8, x8, x13
; CHECK-NEXT: ubfiz x11, x11, #25, #1
; CHECK-NEXT: fmov w7, s2
; CHECK-NEXT: fmov w30, s22
; CHECK-NEXT: ubfiz x13, x18, #15, #1
; CHECK-NEXT: orr x9, x9, x10
; CHECK-NEXT: orr x8, x8, x14
; CHECK-NEXT: ubfiz x10, x27, #26, #1
; CHECK-NEXT: fmov w12, s5
; CHECK-NEXT: orr x8, x8, x11
; CHECK-NEXT: ubfiz x11, x28, #27, #1
; CHECK-NEXT: mov z3.b, z0.b[31]
; CHECK-NEXT: orr x9, x9, x13
; CHECK-NEXT: orr x8, x8, x10
; CHECK-NEXT: ubfiz x10, x6, #16, #1
; CHECK-NEXT: ubfiz x13, x29, #28, #1
; CHECK-NEXT: orr x8, x8, x11
; CHECK-NEXT: ubfiz x11, x7, #17, #1
; CHECK-NEXT: ubfiz x14, x30, #29, #1
; CHECK-NEXT: mov z2.b, z0.b[32]
; CHECK-NEXT: orr x9, x9, x10
; CHECK-NEXT: orr x8, x8, x13
; CHECK-NEXT: ubfiz x10, x12, #30, #1
; CHECK-NEXT: fmov w12, s3
; CHECK-NEXT: orr x9, x9, x11
; CHECK-NEXT: orr x8, x8, x14
; CHECK-NEXT: mov z1.b, z0.b[33]
; CHECK-NEXT: orr x9, x22, x9
; CHECK-NEXT: orr x8, x8, x10
; CHECK-NEXT: orr x8, x9, x8
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: lsl w10, w12, #31
; CHECK-NEXT: mov z2.b, z0.b[34]
; CHECK-NEXT: orr x8, x8, x10
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #32
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[35]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #33
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[36]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #34
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[37]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #35
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[38]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #36
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[39]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #37
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[40]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #38
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[41]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #39
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[42]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #40
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[43]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #41
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[44]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #42
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[45]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #43
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[46]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #44
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[47]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #45
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[48]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #46
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[49]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #47
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[50]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #48
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[51]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #49
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[52]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #50
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[53]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #51
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[54]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #52
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[55]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #53
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[56]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #54
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[57]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #55
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[58]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #56
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[59]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #57
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[60]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #58
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[61]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: fmov w10, s1
; CHECK-NEXT: orr x8, x8, x9, lsl #59
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[62]
; CHECK-NEXT: mov z0.b, z0.b[63]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #60
; CHECK-NEXT: and w9, w10, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #61
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #62
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: orr x8, x8, x9, lsl #63
; CHECK-NEXT: tbz w8, #0, .LBB44_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0]
; CHECK-NEXT: add x0, x0, #1
; CHECK-NEXT: tbnz w8, #1, .LBB44_3
; CHECK-NEXT: b .LBB44_4
; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: adrp x9, .LCPI44_0
; CHECK-NEXT: add x9, x9, :lo12:.LCPI44_0
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9]
; CHECK-NEXT: tbz w8, #1, .LBB44_4
; CHECK-NEXT: .LBB44_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: .LBB44_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB44_68
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB44_69
; CHECK-NEXT: .LBB44_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB44_70
; CHECK-NEXT: .LBB44_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB44_71
; CHECK-NEXT: .LBB44_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB44_72
; CHECK-NEXT: .LBB44_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB44_73
; CHECK-NEXT: .LBB44_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB44_74
; CHECK-NEXT: .LBB44_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB44_75
; CHECK-NEXT: .LBB44_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB44_76
; CHECK-NEXT: .LBB44_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB44_77
; CHECK-NEXT: .LBB44_14: // %else42
; CHECK-NEXT: tbnz w8, #12, .LBB44_78
; CHECK-NEXT: .LBB44_15: // %else46
; CHECK-NEXT: tbnz w8, #13, .LBB44_79
; CHECK-NEXT: .LBB44_16: // %else50
; CHECK-NEXT: tbnz w8, #14, .LBB44_80
; CHECK-NEXT: .LBB44_17: // %else54
; CHECK-NEXT: tbnz w8, #15, .LBB44_81
; CHECK-NEXT: .LBB44_18: // %else58
; CHECK-NEXT: tbnz w8, #16, .LBB44_82
; CHECK-NEXT: .LBB44_19: // %else62
; CHECK-NEXT: tbnz w8, #17, .LBB44_83
; CHECK-NEXT: .LBB44_20: // %else66
; CHECK-NEXT: tbnz w8, #18, .LBB44_84
; CHECK-NEXT: .LBB44_21: // %else70
; CHECK-NEXT: tbnz w8, #19, .LBB44_85
; CHECK-NEXT: .LBB44_22: // %else74
; CHECK-NEXT: tbnz w8, #20, .LBB44_86
; CHECK-NEXT: .LBB44_23: // %else78
; CHECK-NEXT: tbnz w8, #21, .LBB44_87
; CHECK-NEXT: .LBB44_24: // %else82
; CHECK-NEXT: tbnz w8, #22, .LBB44_88
; CHECK-NEXT: .LBB44_25: // %else86
; CHECK-NEXT: tbnz w8, #23, .LBB44_89
; CHECK-NEXT: .LBB44_26: // %else90
; CHECK-NEXT: tbnz w8, #24, .LBB44_90
; CHECK-NEXT: .LBB44_27: // %else94
; CHECK-NEXT: tbnz w8, #25, .LBB44_91
; CHECK-NEXT: .LBB44_28: // %else98
; CHECK-NEXT: tbnz w8, #26, .LBB44_92
; CHECK-NEXT: .LBB44_29: // %else102
; CHECK-NEXT: tbnz w8, #27, .LBB44_93
; CHECK-NEXT: .LBB44_30: // %else106
; CHECK-NEXT: tbnz w8, #28, .LBB44_94
; CHECK-NEXT: .LBB44_31: // %else110
; CHECK-NEXT: tbnz w8, #29, .LBB44_95
; CHECK-NEXT: .LBB44_32: // %else114
; CHECK-NEXT: tbnz w8, #30, .LBB44_96
; CHECK-NEXT: .LBB44_33: // %else118
; CHECK-NEXT: tbnz w8, #31, .LBB44_97
; CHECK-NEXT: .LBB44_34: // %else122
; CHECK-NEXT: tbnz x8, #32, .LBB44_98
; CHECK-NEXT: .LBB44_35: // %else126
; CHECK-NEXT: tbnz x8, #33, .LBB44_99
; CHECK-NEXT: .LBB44_36: // %else130
; CHECK-NEXT: tbnz x8, #34, .LBB44_100
; CHECK-NEXT: .LBB44_37: // %else134
; CHECK-NEXT: tbnz x8, #35, .LBB44_101
; CHECK-NEXT: .LBB44_38: // %else138
; CHECK-NEXT: tbnz x8, #36, .LBB44_102
; CHECK-NEXT: .LBB44_39: // %else142
; CHECK-NEXT: tbnz x8, #37, .LBB44_103
; CHECK-NEXT: .LBB44_40: // %else146
; CHECK-NEXT: tbnz x8, #38, .LBB44_104
; CHECK-NEXT: .LBB44_41: // %else150
; CHECK-NEXT: tbnz x8, #39, .LBB44_105
; CHECK-NEXT: .LBB44_42: // %else154
; CHECK-NEXT: tbnz x8, #40, .LBB44_106
; CHECK-NEXT: .LBB44_43: // %else158
; CHECK-NEXT: tbnz x8, #41, .LBB44_107
; CHECK-NEXT: .LBB44_44: // %else162
; CHECK-NEXT: tbnz x8, #42, .LBB44_108
; CHECK-NEXT: .LBB44_45: // %else166
; CHECK-NEXT: tbnz x8, #43, .LBB44_109
; CHECK-NEXT: .LBB44_46: // %else170
; CHECK-NEXT: tbnz x8, #44, .LBB44_110
; CHECK-NEXT: .LBB44_47: // %else174
; CHECK-NEXT: tbnz x8, #45, .LBB44_111
; CHECK-NEXT: .LBB44_48: // %else178
; CHECK-NEXT: tbnz x8, #46, .LBB44_112
; CHECK-NEXT: .LBB44_49: // %else182
; CHECK-NEXT: tbnz x8, #47, .LBB44_113
; CHECK-NEXT: .LBB44_50: // %else186
; CHECK-NEXT: tbnz x8, #48, .LBB44_114
; CHECK-NEXT: .LBB44_51: // %else190
; CHECK-NEXT: tbnz x8, #49, .LBB44_115
; CHECK-NEXT: .LBB44_52: // %else194
; CHECK-NEXT: tbnz x8, #50, .LBB44_116
; CHECK-NEXT: .LBB44_53: // %else198
; CHECK-NEXT: tbnz x8, #51, .LBB44_117
; CHECK-NEXT: .LBB44_54: // %else202
; CHECK-NEXT: tbnz x8, #52, .LBB44_118
; CHECK-NEXT: .LBB44_55: // %else206
; CHECK-NEXT: tbnz x8, #53, .LBB44_119
; CHECK-NEXT: .LBB44_56: // %else210
; CHECK-NEXT: tbnz x8, #54, .LBB44_120
; CHECK-NEXT: .LBB44_57: // %else214
; CHECK-NEXT: tbnz x8, #55, .LBB44_121
; CHECK-NEXT: .LBB44_58: // %else218
; CHECK-NEXT: tbnz x8, #56, .LBB44_122
; CHECK-NEXT: .LBB44_59: // %else222
; CHECK-NEXT: tbnz x8, #57, .LBB44_123
; CHECK-NEXT: .LBB44_60: // %else226
; CHECK-NEXT: tbnz x8, #58, .LBB44_124
; CHECK-NEXT: .LBB44_61: // %else230
; CHECK-NEXT: tbnz x8, #59, .LBB44_125
; CHECK-NEXT: .LBB44_62: // %else234
; CHECK-NEXT: tbnz x8, #60, .LBB44_126
; CHECK-NEXT: .LBB44_63: // %else238
; CHECK-NEXT: tbnz x8, #61, .LBB44_127
; CHECK-NEXT: .LBB44_64: // %else242
; CHECK-NEXT: tbnz x8, #62, .LBB44_128
; CHECK-NEXT: .LBB44_65: // %else246
; CHECK-NEXT: tbz x8, #63, .LBB44_67
; CHECK-NEXT: .LBB44_66: // %cond.load249
; CHECK-NEXT: mov w8, #63 // =0x3f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: .LBB44_67: // %else250
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB44_68: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #3, .LBB44_6
; CHECK-NEXT: .LBB44_69: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #4, .LBB44_7
; CHECK-NEXT: .LBB44_70: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #5, .LBB44_8
; CHECK-NEXT: .LBB44_71: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #6, .LBB44_9
; CHECK-NEXT: .LBB44_72: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #7, .LBB44_10
; CHECK-NEXT: .LBB44_73: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #8, .LBB44_11
; CHECK-NEXT: .LBB44_74: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #9, .LBB44_12
; CHECK-NEXT: .LBB44_75: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #10, .LBB44_13
; CHECK-NEXT: .LBB44_76: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #11, .LBB44_14
; CHECK-NEXT: .LBB44_77: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #12, .LBB44_15
; CHECK-NEXT: .LBB44_78: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #13, .LBB44_16
; CHECK-NEXT: .LBB44_79: // %cond.load49
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #14, .LBB44_17
; CHECK-NEXT: .LBB44_80: // %cond.load53
; CHECK-NEXT: mov w9, #14 // =0xe
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #15, .LBB44_18
; CHECK-NEXT: .LBB44_81: // %cond.load57
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #16, .LBB44_19
; CHECK-NEXT: .LBB44_82: // %cond.load61
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #17, .LBB44_20
; CHECK-NEXT: .LBB44_83: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #18, .LBB44_21
; CHECK-NEXT: .LBB44_84: // %cond.load69
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #19, .LBB44_22
; CHECK-NEXT: .LBB44_85: // %cond.load73
; CHECK-NEXT: mov w9, #19 // =0x13
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #20, .LBB44_23
; CHECK-NEXT: .LBB44_86: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #21, .LBB44_24
; CHECK-NEXT: .LBB44_87: // %cond.load81
; CHECK-NEXT: mov w9, #21 // =0x15
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #22, .LBB44_25
; CHECK-NEXT: .LBB44_88: // %cond.load85
; CHECK-NEXT: mov w9, #22 // =0x16
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #23, .LBB44_26
; CHECK-NEXT: .LBB44_89: // %cond.load89
; CHECK-NEXT: mov w9, #23 // =0x17
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #24, .LBB44_27
; CHECK-NEXT: .LBB44_90: // %cond.load93
; CHECK-NEXT: mov w9, #24 // =0x18
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #25, .LBB44_28
; CHECK-NEXT: .LBB44_91: // %cond.load97
; CHECK-NEXT: mov w9, #25 // =0x19
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #26, .LBB44_29
; CHECK-NEXT: .LBB44_92: // %cond.load101
; CHECK-NEXT: mov w9, #26 // =0x1a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #27, .LBB44_30
; CHECK-NEXT: .LBB44_93: // %cond.load105
; CHECK-NEXT: mov w9, #27 // =0x1b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #28, .LBB44_31
; CHECK-NEXT: .LBB44_94: // %cond.load109
; CHECK-NEXT: mov w9, #28 // =0x1c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #29, .LBB44_32
; CHECK-NEXT: .LBB44_95: // %cond.load113
; CHECK-NEXT: mov w9, #29 // =0x1d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #30, .LBB44_33
; CHECK-NEXT: .LBB44_96: // %cond.load117
; CHECK-NEXT: mov w9, #30 // =0x1e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #31, .LBB44_34
; CHECK-NEXT: .LBB44_97: // %cond.load121
; CHECK-NEXT: mov w9, #31 // =0x1f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #32, .LBB44_35
; CHECK-NEXT: .LBB44_98: // %cond.load125
; CHECK-NEXT: mov w9, #32 // =0x20
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #33, .LBB44_36
; CHECK-NEXT: .LBB44_99: // %cond.load129
; CHECK-NEXT: mov w9, #33 // =0x21
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #34, .LBB44_37
; CHECK-NEXT: .LBB44_100: // %cond.load133
; CHECK-NEXT: mov w9, #34 // =0x22
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #35, .LBB44_38
; CHECK-NEXT: .LBB44_101: // %cond.load137
; CHECK-NEXT: mov w9, #35 // =0x23
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #36, .LBB44_39
; CHECK-NEXT: .LBB44_102: // %cond.load141
; CHECK-NEXT: mov w9, #36 // =0x24
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #37, .LBB44_40
; CHECK-NEXT: .LBB44_103: // %cond.load145
; CHECK-NEXT: mov w9, #37 // =0x25
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #38, .LBB44_41
; CHECK-NEXT: .LBB44_104: // %cond.load149
; CHECK-NEXT: mov w9, #38 // =0x26
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #39, .LBB44_42
; CHECK-NEXT: .LBB44_105: // %cond.load153
; CHECK-NEXT: mov w9, #39 // =0x27
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #40, .LBB44_43
; CHECK-NEXT: .LBB44_106: // %cond.load157
; CHECK-NEXT: mov w9, #40 // =0x28
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #41, .LBB44_44
; CHECK-NEXT: .LBB44_107: // %cond.load161
; CHECK-NEXT: mov w9, #41 // =0x29
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #42, .LBB44_45
; CHECK-NEXT: .LBB44_108: // %cond.load165
; CHECK-NEXT: mov w9, #42 // =0x2a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #43, .LBB44_46
; CHECK-NEXT: .LBB44_109: // %cond.load169
; CHECK-NEXT: mov w9, #43 // =0x2b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #44, .LBB44_47
; CHECK-NEXT: .LBB44_110: // %cond.load173
; CHECK-NEXT: mov w9, #44 // =0x2c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #45, .LBB44_48
; CHECK-NEXT: .LBB44_111: // %cond.load177
; CHECK-NEXT: mov w9, #45 // =0x2d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #46, .LBB44_49
; CHECK-NEXT: .LBB44_112: // %cond.load181
; CHECK-NEXT: mov w9, #46 // =0x2e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #47, .LBB44_50
; CHECK-NEXT: .LBB44_113: // %cond.load185
; CHECK-NEXT: mov w9, #47 // =0x2f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #48, .LBB44_51
; CHECK-NEXT: .LBB44_114: // %cond.load189
; CHECK-NEXT: mov w9, #48 // =0x30
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #49, .LBB44_52
; CHECK-NEXT: .LBB44_115: // %cond.load193
; CHECK-NEXT: mov w9, #49 // =0x31
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #50, .LBB44_53
; CHECK-NEXT: .LBB44_116: // %cond.load197
; CHECK-NEXT: mov w9, #50 // =0x32
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #51, .LBB44_54
; CHECK-NEXT: .LBB44_117: // %cond.load201
; CHECK-NEXT: mov w9, #51 // =0x33
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #52, .LBB44_55
; CHECK-NEXT: .LBB44_118: // %cond.load205
; CHECK-NEXT: mov w9, #52 // =0x34
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #53, .LBB44_56
; CHECK-NEXT: .LBB44_119: // %cond.load209
; CHECK-NEXT: mov w9, #53 // =0x35
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #54, .LBB44_57
; CHECK-NEXT: .LBB44_120: // %cond.load213
; CHECK-NEXT: mov w9, #54 // =0x36
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #55, .LBB44_58
; CHECK-NEXT: .LBB44_121: // %cond.load217
; CHECK-NEXT: mov w9, #55 // =0x37
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #56, .LBB44_59
; CHECK-NEXT: .LBB44_122: // %cond.load221
; CHECK-NEXT: mov w9, #56 // =0x38
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #57, .LBB44_60
; CHECK-NEXT: .LBB44_123: // %cond.load225
; CHECK-NEXT: mov w9, #57 // =0x39
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #58, .LBB44_61
; CHECK-NEXT: .LBB44_124: // %cond.load229
; CHECK-NEXT: mov w9, #58 // =0x3a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #59, .LBB44_62
; CHECK-NEXT: .LBB44_125: // %cond.load233
; CHECK-NEXT: mov w9, #59 // =0x3b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #60, .LBB44_63
; CHECK-NEXT: .LBB44_126: // %cond.load237
; CHECK-NEXT: mov w9, #60 // =0x3c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #61, .LBB44_64
; CHECK-NEXT: .LBB44_127: // %cond.load241
; CHECK-NEXT: mov w9, #61 // =0x3d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz x8, #62, .LBB44_65
; CHECK-NEXT: .LBB44_128: // %cond.load245
; CHECK-NEXT: mov w9, #62 // =0x3e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbnz x8, #63, .LBB44_66
; CHECK-NEXT: b .LBB44_67
;
; CHECK-EXPAND-LABEL: masked_load_zext_v64i8i32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl64
; CHECK-EXPAND-NEXT: ld1b { z0.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s
; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8
; CHECK-EXPAND-NEXT: ld1b { z0.s }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%b = load <64 x i8>, ptr %bp
%mask = icmp eq <64 x i8> %b, zeroinitializer
%load = call <64 x i8> @llvm.masked.expandload.v64i8(ptr %ap, <64 x i1> %mask, <64 x i8> poison)
%ext = zext <64 x i8> %load to <64 x i32>
store <64 x i32> %ext, ptr %c
ret void
}
define void @masked_load_zext_v32i8i64(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_zext_v32i8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #112
; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: ptrue p1.b, vl32
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x1]
; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, #0
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: umov w13, v0.b[1]
; CHECK-NEXT: fmov w6, s0
; CHECK-NEXT: umov w4, v0.b[7]
; CHECK-NEXT: umov w5, v0.b[8]
; CHECK-NEXT: umov w12, v0.b[2]
; CHECK-NEXT: umov w3, v0.b[9]
; CHECK-NEXT: mov z5.b, z0.b[18]
; CHECK-NEXT: mov z6.b, z0.b[19]
; CHECK-NEXT: umov w11, v0.b[3]
; CHECK-NEXT: and w6, w6, #0x1
; CHECK-NEXT: umov w1, v0.b[10]
; CHECK-NEXT: mov z7.b, z0.b[20]
; CHECK-NEXT: bfi w6, w13, #1, #1
; CHECK-NEXT: umov w18, v0.b[11]
; CHECK-NEXT: mov z16.b, z0.b[21]
; CHECK-NEXT: ubfiz w13, w4, #7, #1
; CHECK-NEXT: ubfiz w4, w5, #8, #1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: mov z17.b, z0.b[22]
; CHECK-NEXT: fmov w20, s5
; CHECK-NEXT: fmov w21, s6
; CHECK-NEXT: bfi w6, w12, #2, #1
; CHECK-NEXT: umov w16, v0.b[12]
; CHECK-NEXT: mov z18.b, z0.b[23]
; CHECK-NEXT: fmov w22, s7
; CHECK-NEXT: orr w12, w13, w4
; CHECK-NEXT: ubfiz w13, w3, #9, #1
; CHECK-NEXT: umov w9, v0.b[5]
; CHECK-NEXT: umov w17, v0.b[13]
; CHECK-NEXT: mov z19.b, z0.b[24]
; CHECK-NEXT: fmov w23, s16
; CHECK-NEXT: bfi w6, w11, #3, #1
; CHECK-NEXT: ubfiz w11, w1, #10, #1
; CHECK-NEXT: mov z20.b, z0.b[25]
; CHECK-NEXT: fmov w24, s17
; CHECK-NEXT: ubfiz w3, w20, #18, #1
; CHECK-NEXT: ubfiz w4, w21, #19, #1
; CHECK-NEXT: orr w12, w12, w13
; CHECK-NEXT: ubfiz w13, w18, #11, #1
; CHECK-NEXT: mov z21.b, z0.b[26]
; CHECK-NEXT: fmov w25, s18
; CHECK-NEXT: ubfiz w1, w22, #20, #1
; CHECK-NEXT: orr w11, w12, w11
; CHECK-NEXT: bfi w6, w10, #4, #1
; CHECK-NEXT: umov w14, v0.b[14]
; CHECK-NEXT: fmov w26, s19
; CHECK-NEXT: orr w3, w3, w4
; CHECK-NEXT: orr w11, w11, w13
; CHECK-NEXT: ubfiz w12, w16, #12, #1
; CHECK-NEXT: ubfiz w13, w23, #21, #1
; CHECK-NEXT: mov z22.b, z0.b[27]
; CHECK-NEXT: fmov w27, s20
; CHECK-NEXT: orr w10, w3, w1
; CHECK-NEXT: bfi w6, w9, #5, #1
; CHECK-NEXT: ubfiz w9, w17, #13, #1
; CHECK-NEXT: ubfiz w16, w24, #22, #1
; CHECK-NEXT: umov w8, v0.b[6]
; CHECK-NEXT: umov w15, v0.b[15]
; CHECK-NEXT: mov z3.b, z0.b[16]
; CHECK-NEXT: mov z23.b, z0.b[28]
; CHECK-NEXT: fmov w5, s21
; CHECK-NEXT: orr w11, w11, w12
; CHECK-NEXT: orr w10, w10, w13
; CHECK-NEXT: ubfiz w12, w25, #23, #1
; CHECK-NEXT: mov z4.b, z0.b[17]
; CHECK-NEXT: mov z24.b, z0.b[29]
; CHECK-NEXT: orr w9, w11, w9
; CHECK-NEXT: orr w10, w10, w16
; CHECK-NEXT: ubfiz w11, w26, #24, #1
; CHECK-NEXT: mov z2.b, z0.b[30]
; CHECK-NEXT: fmov w28, s22
; CHECK-NEXT: orr w10, w10, w12
; CHECK-NEXT: ubfiz w12, w14, #14, #1
; CHECK-NEXT: ubfiz w13, w27, #25, #1
; CHECK-NEXT: fmov w7, s3
; CHECK-NEXT: fmov w29, s23
; CHECK-NEXT: orr w10, w10, w11
; CHECK-NEXT: ubfiz w14, w5, #26, #1
; CHECK-NEXT: fmov w19, s4
; CHECK-NEXT: fmov w30, s24
; CHECK-NEXT: ubfiz w11, w15, #15, #1
; CHECK-NEXT: bfi w6, w8, #6, #1
; CHECK-NEXT: orr w8, w9, w12
; CHECK-NEXT: orr w9, w10, w13
; CHECK-NEXT: orr w9, w9, w14
; CHECK-NEXT: ubfiz w10, w28, #27, #1
; CHECK-NEXT: fmov w14, s2
; CHECK-NEXT: orr w8, w8, w11
; CHECK-NEXT: ubfiz w11, w7, #16, #1
; CHECK-NEXT: ubfiz w13, w29, #28, #1
; CHECK-NEXT: ubfiz w12, w19, #17, #1
; CHECK-NEXT: orr w9, w9, w10
; CHECK-NEXT: ubfiz w10, w30, #29, #1
; CHECK-NEXT: mov z1.b, z0.b[31]
; CHECK-NEXT: orr w8, w8, w11
; CHECK-NEXT: orr w9, w9, w13
; CHECK-NEXT: ubfiz w11, w14, #30, #1
; CHECK-NEXT: orr w8, w8, w12
; CHECK-NEXT: orr w9, w9, w10
; CHECK-NEXT: orr w8, w6, w8
; CHECK-NEXT: orr w9, w9, w11
; CHECK-NEXT: orr w8, w8, w9
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: orr w8, w8, w9, lsl #31
; CHECK-NEXT: tbz w8, #0, .LBB45_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0]
; CHECK-NEXT: add x0, x0, #1
; CHECK-NEXT: tbnz w8, #1, .LBB45_3
; CHECK-NEXT: b .LBB45_4
; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: adrp x9, .LCPI45_0
; CHECK-NEXT: add x9, x9, :lo12:.LCPI45_0
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9]
; CHECK-NEXT: tbz w8, #1, .LBB45_4
; CHECK-NEXT: .LBB45_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: .LBB45_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB45_36
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB45_37
; CHECK-NEXT: .LBB45_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB45_38
; CHECK-NEXT: .LBB45_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB45_39
; CHECK-NEXT: .LBB45_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB45_40
; CHECK-NEXT: .LBB45_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB45_41
; CHECK-NEXT: .LBB45_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB45_42
; CHECK-NEXT: .LBB45_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB45_43
; CHECK-NEXT: .LBB45_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB45_44
; CHECK-NEXT: .LBB45_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB45_45
; CHECK-NEXT: .LBB45_14: // %else42
; CHECK-NEXT: tbnz w8, #12, .LBB45_46
; CHECK-NEXT: .LBB45_15: // %else46
; CHECK-NEXT: tbnz w8, #13, .LBB45_47
; CHECK-NEXT: .LBB45_16: // %else50
; CHECK-NEXT: tbnz w8, #14, .LBB45_48
; CHECK-NEXT: .LBB45_17: // %else54
; CHECK-NEXT: tbnz w8, #15, .LBB45_49
; CHECK-NEXT: .LBB45_18: // %else58
; CHECK-NEXT: tbnz w8, #16, .LBB45_50
; CHECK-NEXT: .LBB45_19: // %else62
; CHECK-NEXT: tbnz w8, #17, .LBB45_51
; CHECK-NEXT: .LBB45_20: // %else66
; CHECK-NEXT: tbnz w8, #18, .LBB45_52
; CHECK-NEXT: .LBB45_21: // %else70
; CHECK-NEXT: tbnz w8, #19, .LBB45_53
; CHECK-NEXT: .LBB45_22: // %else74
; CHECK-NEXT: tbnz w8, #20, .LBB45_54
; CHECK-NEXT: .LBB45_23: // %else78
; CHECK-NEXT: tbnz w8, #21, .LBB45_55
; CHECK-NEXT: .LBB45_24: // %else82
; CHECK-NEXT: tbnz w8, #22, .LBB45_56
; CHECK-NEXT: .LBB45_25: // %else86
; CHECK-NEXT: tbnz w8, #23, .LBB45_57
; CHECK-NEXT: .LBB45_26: // %else90
; CHECK-NEXT: tbnz w8, #24, .LBB45_58
; CHECK-NEXT: .LBB45_27: // %else94
; CHECK-NEXT: tbnz w8, #25, .LBB45_59
; CHECK-NEXT: .LBB45_28: // %else98
; CHECK-NEXT: tbnz w8, #26, .LBB45_60
; CHECK-NEXT: .LBB45_29: // %else102
; CHECK-NEXT: tbnz w8, #27, .LBB45_61
; CHECK-NEXT: .LBB45_30: // %else106
; CHECK-NEXT: tbnz w8, #28, .LBB45_62
; CHECK-NEXT: .LBB45_31: // %else110
; CHECK-NEXT: tbnz w8, #29, .LBB45_63
; CHECK-NEXT: .LBB45_32: // %else114
; CHECK-NEXT: tbnz w8, #30, .LBB45_64
; CHECK-NEXT: .LBB45_33: // %else118
; CHECK-NEXT: tbz w8, #31, .LBB45_35
; CHECK-NEXT: .LBB45_34: // %cond.load121
; CHECK-NEXT: mov w8, #31 // =0x1f
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w8
; CHECK-NEXT: .LBB45_35: // %else122
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB45_36: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #3, .LBB45_6
; CHECK-NEXT: .LBB45_37: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #4, .LBB45_7
; CHECK-NEXT: .LBB45_38: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #5, .LBB45_8
; CHECK-NEXT: .LBB45_39: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #6, .LBB45_9
; CHECK-NEXT: .LBB45_40: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #7, .LBB45_10
; CHECK-NEXT: .LBB45_41: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #8, .LBB45_11
; CHECK-NEXT: .LBB45_42: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #9, .LBB45_12
; CHECK-NEXT: .LBB45_43: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #10, .LBB45_13
; CHECK-NEXT: .LBB45_44: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #11, .LBB45_14
; CHECK-NEXT: .LBB45_45: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #12, .LBB45_15
; CHECK-NEXT: .LBB45_46: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #13, .LBB45_16
; CHECK-NEXT: .LBB45_47: // %cond.load49
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #14, .LBB45_17
; CHECK-NEXT: .LBB45_48: // %cond.load53
; CHECK-NEXT: mov w9, #14 // =0xe
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #15, .LBB45_18
; CHECK-NEXT: .LBB45_49: // %cond.load57
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #16, .LBB45_19
; CHECK-NEXT: .LBB45_50: // %cond.load61
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #17, .LBB45_20
; CHECK-NEXT: .LBB45_51: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #18, .LBB45_21
; CHECK-NEXT: .LBB45_52: // %cond.load69
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #19, .LBB45_22
; CHECK-NEXT: .LBB45_53: // %cond.load73
; CHECK-NEXT: mov w9, #19 // =0x13
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #20, .LBB45_23
; CHECK-NEXT: .LBB45_54: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #21, .LBB45_24
; CHECK-NEXT: .LBB45_55: // %cond.load81
; CHECK-NEXT: mov w9, #21 // =0x15
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #22, .LBB45_25
; CHECK-NEXT: .LBB45_56: // %cond.load85
; CHECK-NEXT: mov w9, #22 // =0x16
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #23, .LBB45_26
; CHECK-NEXT: .LBB45_57: // %cond.load89
; CHECK-NEXT: mov w9, #23 // =0x17
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #24, .LBB45_27
; CHECK-NEXT: .LBB45_58: // %cond.load93
; CHECK-NEXT: mov w9, #24 // =0x18
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #25, .LBB45_28
; CHECK-NEXT: .LBB45_59: // %cond.load97
; CHECK-NEXT: mov w9, #25 // =0x19
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #26, .LBB45_29
; CHECK-NEXT: .LBB45_60: // %cond.load101
; CHECK-NEXT: mov w9, #26 // =0x1a
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #27, .LBB45_30
; CHECK-NEXT: .LBB45_61: // %cond.load105
; CHECK-NEXT: mov w9, #27 // =0x1b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #28, .LBB45_31
; CHECK-NEXT: .LBB45_62: // %cond.load109
; CHECK-NEXT: mov w9, #28 // =0x1c
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #29, .LBB45_32
; CHECK-NEXT: .LBB45_63: // %cond.load113
; CHECK-NEXT: mov w9, #29 // =0x1d
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbz w8, #30, .LBB45_33
; CHECK-NEXT: .LBB45_64: // %cond.load117
; CHECK-NEXT: mov w9, #30 // =0x1e
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w9
; CHECK-NEXT: ldrb w9, [x0], #1
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p1/m, w9
; CHECK-NEXT: tbnz w8, #31, .LBB45_34
; CHECK-NEXT: b .LBB45_35
;
; CHECK-EXPAND-LABEL: masked_load_zext_v32i8i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.d, vl32
; CHECK-EXPAND-NEXT: ld1b { z0.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.d
; CHECK-EXPAND-NEXT: whilelo p2.d, xzr, x8
; CHECK-EXPAND-NEXT: ld1b { z0.d }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.d, p1, z0.d
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%b = load <32 x i8>, ptr %bp
%mask = icmp eq <32 x i8> %b, zeroinitializer
%load = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %ap, <32 x i1> %mask, <32 x i8> poison)
%ext = zext <32 x i8> %load to <32 x i64>
store <32 x i64> %ext, ptr %c
ret void
}
define void @masked_load_zext_v64i16i32(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_zext_v64i16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #112
; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: ptrue p1.h, vl64
; CHECK-NEXT: str x2, [sp] // 8-byte Spill
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x1]
; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, #0
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: umov w12, v0.b[1]
; CHECK-NEXT: fmov w25, s0
; CHECK-NEXT: mov z3.b, z0.b[18]
; CHECK-NEXT: mov z4.b, z0.b[19]
; CHECK-NEXT: umov w13, v0.b[2]
; CHECK-NEXT: umov w14, v0.b[7]
; CHECK-NEXT: umov w3, v0.b[8]
; CHECK-NEXT: mov z5.b, z0.b[20]
; CHECK-NEXT: umov w4, v0.b[9]
; CHECK-NEXT: mov z6.b, z0.b[21]
; CHECK-NEXT: and x25, x25, #0x1
; CHECK-NEXT: umov w5, v0.b[10]
; CHECK-NEXT: mov z7.b, z0.b[22]
; CHECK-NEXT: fmov w19, s3
; CHECK-NEXT: fmov w20, s4
; CHECK-NEXT: bfi x25, x12, #1, #1
; CHECK-NEXT: umov w11, v0.b[3]
; CHECK-NEXT: mov z16.b, z0.b[23]
; CHECK-NEXT: fmov w21, s5
; CHECK-NEXT: umov w15, v0.b[11]
; CHECK-NEXT: fmov w22, s6
; CHECK-NEXT: bfi x25, x13, #2, #1
; CHECK-NEXT: ubfiz x13, x14, #7, #1
; CHECK-NEXT: ubfiz x14, x3, #8, #1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: umov w17, v0.b[12]
; CHECK-NEXT: mov z17.b, z0.b[24]
; CHECK-NEXT: fmov w23, s7
; CHECK-NEXT: ubfiz x3, x4, #9, #1
; CHECK-NEXT: ubfiz x4, x19, #18, #1
; CHECK-NEXT: ubfiz x19, x20, #19, #1
; CHECK-NEXT: umov w18, v0.b[13]
; CHECK-NEXT: mov z18.b, z0.b[25]
; CHECK-NEXT: fmov w24, s16
; CHECK-NEXT: orr x13, x13, x14
; CHECK-NEXT: ubfiz x14, x5, #10, #1
; CHECK-NEXT: ubfiz x5, x21, #20, #1
; CHECK-NEXT: umov w9, v0.b[5]
; CHECK-NEXT: umov w16, v0.b[14]
; CHECK-NEXT: mov z19.b, z0.b[26]
; CHECK-NEXT: orr x4, x4, x19
; CHECK-NEXT: orr x13, x13, x3
; CHECK-NEXT: ubfiz x3, x22, #21, #1
; CHECK-NEXT: bfi x25, x11, #3, #1
; CHECK-NEXT: mov z20.b, z0.b[27]
; CHECK-NEXT: fmov w26, s17
; CHECK-NEXT: orr x11, x13, x14
; CHECK-NEXT: orr x13, x4, x5
; CHECK-NEXT: ubfiz x14, x15, #11, #1
; CHECK-NEXT: ubfiz x15, x23, #22, #1
; CHECK-NEXT: mov z1.b, z0.b[16]
; CHECK-NEXT: mov z21.b, z0.b[28]
; CHECK-NEXT: fmov w27, s18
; CHECK-NEXT: orr x13, x13, x3
; CHECK-NEXT: bfi x25, x10, #4, #1
; CHECK-NEXT: ubfiz x10, x17, #12, #1
; CHECK-NEXT: ubfiz x17, x24, #23, #1
; CHECK-NEXT: umov w1, v0.b[15]
; CHECK-NEXT: mov z2.b, z0.b[17]
; CHECK-NEXT: mov z4.b, z0.b[29]
; CHECK-NEXT: fmov w28, s19
; CHECK-NEXT: orr x11, x11, x14
; CHECK-NEXT: orr x13, x13, x15
; CHECK-NEXT: ubfiz x14, x18, #13, #1
; CHECK-NEXT: mov z5.b, z0.b[30]
; CHECK-NEXT: fmov w29, s20
; CHECK-NEXT: orr x10, x11, x10
; CHECK-NEXT: bfi x25, x9, #5, #1
; CHECK-NEXT: orr x9, x13, x17
; CHECK-NEXT: ubfiz x11, x16, #14, #1
; CHECK-NEXT: ubfiz x13, x26, #24, #1
; CHECK-NEXT: fmov w6, s1
; CHECK-NEXT: fmov w12, s21
; CHECK-NEXT: orr x10, x10, x14
; CHECK-NEXT: ubfiz x15, x27, #25, #1
; CHECK-NEXT: umov w2, v0.b[6]
; CHECK-NEXT: fmov w7, s2
; CHECK-NEXT: fmov w30, s4
; CHECK-NEXT: orr x10, x10, x11
; CHECK-NEXT: orr x9, x9, x13
; CHECK-NEXT: ubfiz x11, x28, #26, #1
; CHECK-NEXT: fmov w8, s5
; CHECK-NEXT: ubfiz x14, x1, #15, #1
; CHECK-NEXT: orr x9, x9, x15
; CHECK-NEXT: ubfiz x13, x29, #27, #1
; CHECK-NEXT: mov z3.b, z0.b[31]
; CHECK-NEXT: orr x9, x9, x11
; CHECK-NEXT: ubfiz x11, x6, #16, #1
; CHECK-NEXT: ubfiz x12, x12, #28, #1
; CHECK-NEXT: orr x10, x10, x14
; CHECK-NEXT: orr x9, x9, x13
; CHECK-NEXT: ubfiz x13, x7, #17, #1
; CHECK-NEXT: ubfiz x14, x30, #29, #1
; CHECK-NEXT: mov z2.b, z0.b[32]
; CHECK-NEXT: bfi x25, x2, #6, #1
; CHECK-NEXT: orr x10, x10, x11
; CHECK-NEXT: orr x9, x9, x12
; CHECK-NEXT: ubfiz x8, x8, #30, #1
; CHECK-NEXT: fmov w11, s3
; CHECK-NEXT: orr x10, x10, x13
; CHECK-NEXT: orr x9, x9, x14
; CHECK-NEXT: mov z1.b, z0.b[33]
; CHECK-NEXT: orr x10, x25, x10
; CHECK-NEXT: orr x8, x9, x8
; CHECK-NEXT: orr x8, x10, x8
; CHECK-NEXT: fmov w10, s2
; CHECK-NEXT: lsl w9, w11, #31
; CHECK-NEXT: mov z2.b, z0.b[34]
; CHECK-NEXT: orr x8, x8, x9
; CHECK-NEXT: and w9, w10, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #32
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[35]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #33
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[36]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #34
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[37]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #35
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[38]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #36
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[39]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #37
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[40]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #38
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[41]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #39
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[42]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #40
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[43]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #41
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[44]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #42
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[45]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #43
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[46]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #44
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[47]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #45
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[48]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #46
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[49]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #47
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[50]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #48
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[51]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #49
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[52]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #50
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[53]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #51
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[54]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #52
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[55]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #53
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[56]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #54
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[57]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #55
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[58]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #56
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[59]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #57
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[60]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #58
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z1.b, z0.b[61]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: fmov w10, s1
; CHECK-NEXT: orr x8, x8, x9, lsl #59
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: mov z2.b, z0.b[62]
; CHECK-NEXT: mov z0.b, z0.b[63]
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #60
; CHECK-NEXT: and w9, w10, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #61
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: orr x8, x8, x9, lsl #62
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: orr x8, x8, x9, lsl #63
; CHECK-NEXT: tbz w8, #0, .LBB46_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0]
; CHECK-NEXT: add x0, x0, #2
; CHECK-NEXT: tbnz w8, #1, .LBB46_3
; CHECK-NEXT: b .LBB46_4
; CHECK-NEXT: .LBB46_2:
; CHECK-NEXT: adrp x9, .LCPI46_0
; CHECK-NEXT: add x9, x9, :lo12:.LCPI46_0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x9]
; CHECK-NEXT: tbz w8, #1, .LBB46_4
; CHECK-NEXT: .LBB46_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: .LBB46_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB46_68
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB46_69
; CHECK-NEXT: .LBB46_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB46_70
; CHECK-NEXT: .LBB46_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB46_71
; CHECK-NEXT: .LBB46_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB46_72
; CHECK-NEXT: .LBB46_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB46_73
; CHECK-NEXT: .LBB46_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB46_74
; CHECK-NEXT: .LBB46_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB46_75
; CHECK-NEXT: .LBB46_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB46_76
; CHECK-NEXT: .LBB46_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB46_77
; CHECK-NEXT: .LBB46_14: // %else42
; CHECK-NEXT: tbnz w8, #12, .LBB46_78
; CHECK-NEXT: .LBB46_15: // %else46
; CHECK-NEXT: tbnz w8, #13, .LBB46_79
; CHECK-NEXT: .LBB46_16: // %else50
; CHECK-NEXT: tbnz w8, #14, .LBB46_80
; CHECK-NEXT: .LBB46_17: // %else54
; CHECK-NEXT: tbnz w8, #15, .LBB46_81
; CHECK-NEXT: .LBB46_18: // %else58
; CHECK-NEXT: tbnz w8, #16, .LBB46_82
; CHECK-NEXT: .LBB46_19: // %else62
; CHECK-NEXT: tbnz w8, #17, .LBB46_83
; CHECK-NEXT: .LBB46_20: // %else66
; CHECK-NEXT: tbnz w8, #18, .LBB46_84
; CHECK-NEXT: .LBB46_21: // %else70
; CHECK-NEXT: tbnz w8, #19, .LBB46_85
; CHECK-NEXT: .LBB46_22: // %else74
; CHECK-NEXT: tbnz w8, #20, .LBB46_86
; CHECK-NEXT: .LBB46_23: // %else78
; CHECK-NEXT: tbnz w8, #21, .LBB46_87
; CHECK-NEXT: .LBB46_24: // %else82
; CHECK-NEXT: tbnz w8, #22, .LBB46_88
; CHECK-NEXT: .LBB46_25: // %else86
; CHECK-NEXT: tbnz w8, #23, .LBB46_89
; CHECK-NEXT: .LBB46_26: // %else90
; CHECK-NEXT: tbnz w8, #24, .LBB46_90
; CHECK-NEXT: .LBB46_27: // %else94
; CHECK-NEXT: tbnz w8, #25, .LBB46_91
; CHECK-NEXT: .LBB46_28: // %else98
; CHECK-NEXT: tbnz w8, #26, .LBB46_92
; CHECK-NEXT: .LBB46_29: // %else102
; CHECK-NEXT: tbnz w8, #27, .LBB46_93
; CHECK-NEXT: .LBB46_30: // %else106
; CHECK-NEXT: tbnz w8, #28, .LBB46_94
; CHECK-NEXT: .LBB46_31: // %else110
; CHECK-NEXT: tbnz w8, #29, .LBB46_95
; CHECK-NEXT: .LBB46_32: // %else114
; CHECK-NEXT: tbnz w8, #30, .LBB46_96
; CHECK-NEXT: .LBB46_33: // %else118
; CHECK-NEXT: tbnz w8, #31, .LBB46_97
; CHECK-NEXT: .LBB46_34: // %else122
; CHECK-NEXT: tbnz x8, #32, .LBB46_98
; CHECK-NEXT: .LBB46_35: // %else126
; CHECK-NEXT: tbnz x8, #33, .LBB46_99
; CHECK-NEXT: .LBB46_36: // %else130
; CHECK-NEXT: tbnz x8, #34, .LBB46_100
; CHECK-NEXT: .LBB46_37: // %else134
; CHECK-NEXT: tbnz x8, #35, .LBB46_101
; CHECK-NEXT: .LBB46_38: // %else138
; CHECK-NEXT: tbnz x8, #36, .LBB46_102
; CHECK-NEXT: .LBB46_39: // %else142
; CHECK-NEXT: tbnz x8, #37, .LBB46_103
; CHECK-NEXT: .LBB46_40: // %else146
; CHECK-NEXT: tbnz x8, #38, .LBB46_104
; CHECK-NEXT: .LBB46_41: // %else150
; CHECK-NEXT: tbnz x8, #39, .LBB46_105
; CHECK-NEXT: .LBB46_42: // %else154
; CHECK-NEXT: tbnz x8, #40, .LBB46_106
; CHECK-NEXT: .LBB46_43: // %else158
; CHECK-NEXT: tbnz x8, #41, .LBB46_107
; CHECK-NEXT: .LBB46_44: // %else162
; CHECK-NEXT: tbnz x8, #42, .LBB46_108
; CHECK-NEXT: .LBB46_45: // %else166
; CHECK-NEXT: tbnz x8, #43, .LBB46_109
; CHECK-NEXT: .LBB46_46: // %else170
; CHECK-NEXT: tbnz x8, #44, .LBB46_110
; CHECK-NEXT: .LBB46_47: // %else174
; CHECK-NEXT: tbnz x8, #45, .LBB46_111
; CHECK-NEXT: .LBB46_48: // %else178
; CHECK-NEXT: tbnz x8, #46, .LBB46_112
; CHECK-NEXT: .LBB46_49: // %else182
; CHECK-NEXT: tbnz x8, #47, .LBB46_113
; CHECK-NEXT: .LBB46_50: // %else186
; CHECK-NEXT: tbnz x8, #48, .LBB46_114
; CHECK-NEXT: .LBB46_51: // %else190
; CHECK-NEXT: tbnz x8, #49, .LBB46_115
; CHECK-NEXT: .LBB46_52: // %else194
; CHECK-NEXT: tbnz x8, #50, .LBB46_116
; CHECK-NEXT: .LBB46_53: // %else198
; CHECK-NEXT: tbnz x8, #51, .LBB46_117
; CHECK-NEXT: .LBB46_54: // %else202
; CHECK-NEXT: tbnz x8, #52, .LBB46_118
; CHECK-NEXT: .LBB46_55: // %else206
; CHECK-NEXT: tbnz x8, #53, .LBB46_119
; CHECK-NEXT: .LBB46_56: // %else210
; CHECK-NEXT: tbnz x8, #54, .LBB46_120
; CHECK-NEXT: .LBB46_57: // %else214
; CHECK-NEXT: tbnz x8, #55, .LBB46_121
; CHECK-NEXT: .LBB46_58: // %else218
; CHECK-NEXT: tbnz x8, #56, .LBB46_122
; CHECK-NEXT: .LBB46_59: // %else222
; CHECK-NEXT: tbnz x8, #57, .LBB46_123
; CHECK-NEXT: .LBB46_60: // %else226
; CHECK-NEXT: tbnz x8, #58, .LBB46_124
; CHECK-NEXT: .LBB46_61: // %else230
; CHECK-NEXT: tbnz x8, #59, .LBB46_125
; CHECK-NEXT: .LBB46_62: // %else234
; CHECK-NEXT: tbnz x8, #60, .LBB46_126
; CHECK-NEXT: .LBB46_63: // %else238
; CHECK-NEXT: tbnz x8, #61, .LBB46_127
; CHECK-NEXT: .LBB46_64: // %else242
; CHECK-NEXT: tbnz x8, #62, .LBB46_128
; CHECK-NEXT: .LBB46_65: // %else246
; CHECK-NEXT: tbz x8, #63, .LBB46_67
; CHECK-NEXT: .LBB46_66: // %cond.load249
; CHECK-NEXT: mov w8, #63 // =0x3f
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w8
; CHECK-NEXT: .LBB46_67: // %else250
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ldr x8, [sp] // 8-byte Reload
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB46_68: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #3, .LBB46_6
; CHECK-NEXT: .LBB46_69: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #4, .LBB46_7
; CHECK-NEXT: .LBB46_70: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #5, .LBB46_8
; CHECK-NEXT: .LBB46_71: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #6, .LBB46_9
; CHECK-NEXT: .LBB46_72: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #7, .LBB46_10
; CHECK-NEXT: .LBB46_73: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #8, .LBB46_11
; CHECK-NEXT: .LBB46_74: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #9, .LBB46_12
; CHECK-NEXT: .LBB46_75: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #10, .LBB46_13
; CHECK-NEXT: .LBB46_76: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #11, .LBB46_14
; CHECK-NEXT: .LBB46_77: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #12, .LBB46_15
; CHECK-NEXT: .LBB46_78: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #13, .LBB46_16
; CHECK-NEXT: .LBB46_79: // %cond.load49
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #14, .LBB46_17
; CHECK-NEXT: .LBB46_80: // %cond.load53
; CHECK-NEXT: mov w9, #14 // =0xe
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #15, .LBB46_18
; CHECK-NEXT: .LBB46_81: // %cond.load57
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #16, .LBB46_19
; CHECK-NEXT: .LBB46_82: // %cond.load61
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #17, .LBB46_20
; CHECK-NEXT: .LBB46_83: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #18, .LBB46_21
; CHECK-NEXT: .LBB46_84: // %cond.load69
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #19, .LBB46_22
; CHECK-NEXT: .LBB46_85: // %cond.load73
; CHECK-NEXT: mov w9, #19 // =0x13
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #20, .LBB46_23
; CHECK-NEXT: .LBB46_86: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #21, .LBB46_24
; CHECK-NEXT: .LBB46_87: // %cond.load81
; CHECK-NEXT: mov w9, #21 // =0x15
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #22, .LBB46_25
; CHECK-NEXT: .LBB46_88: // %cond.load85
; CHECK-NEXT: mov w9, #22 // =0x16
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #23, .LBB46_26
; CHECK-NEXT: .LBB46_89: // %cond.load89
; CHECK-NEXT: mov w9, #23 // =0x17
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #24, .LBB46_27
; CHECK-NEXT: .LBB46_90: // %cond.load93
; CHECK-NEXT: mov w9, #24 // =0x18
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #25, .LBB46_28
; CHECK-NEXT: .LBB46_91: // %cond.load97
; CHECK-NEXT: mov w9, #25 // =0x19
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #26, .LBB46_29
; CHECK-NEXT: .LBB46_92: // %cond.load101
; CHECK-NEXT: mov w9, #26 // =0x1a
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #27, .LBB46_30
; CHECK-NEXT: .LBB46_93: // %cond.load105
; CHECK-NEXT: mov w9, #27 // =0x1b
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #28, .LBB46_31
; CHECK-NEXT: .LBB46_94: // %cond.load109
; CHECK-NEXT: mov w9, #28 // =0x1c
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #29, .LBB46_32
; CHECK-NEXT: .LBB46_95: // %cond.load113
; CHECK-NEXT: mov w9, #29 // =0x1d
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #30, .LBB46_33
; CHECK-NEXT: .LBB46_96: // %cond.load117
; CHECK-NEXT: mov w9, #30 // =0x1e
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #31, .LBB46_34
; CHECK-NEXT: .LBB46_97: // %cond.load121
; CHECK-NEXT: mov w9, #31 // =0x1f
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #32, .LBB46_35
; CHECK-NEXT: .LBB46_98: // %cond.load125
; CHECK-NEXT: mov w9, #32 // =0x20
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #33, .LBB46_36
; CHECK-NEXT: .LBB46_99: // %cond.load129
; CHECK-NEXT: mov w9, #33 // =0x21
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #34, .LBB46_37
; CHECK-NEXT: .LBB46_100: // %cond.load133
; CHECK-NEXT: mov w9, #34 // =0x22
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #35, .LBB46_38
; CHECK-NEXT: .LBB46_101: // %cond.load137
; CHECK-NEXT: mov w9, #35 // =0x23
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #36, .LBB46_39
; CHECK-NEXT: .LBB46_102: // %cond.load141
; CHECK-NEXT: mov w9, #36 // =0x24
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #37, .LBB46_40
; CHECK-NEXT: .LBB46_103: // %cond.load145
; CHECK-NEXT: mov w9, #37 // =0x25
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #38, .LBB46_41
; CHECK-NEXT: .LBB46_104: // %cond.load149
; CHECK-NEXT: mov w9, #38 // =0x26
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #39, .LBB46_42
; CHECK-NEXT: .LBB46_105: // %cond.load153
; CHECK-NEXT: mov w9, #39 // =0x27
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #40, .LBB46_43
; CHECK-NEXT: .LBB46_106: // %cond.load157
; CHECK-NEXT: mov w9, #40 // =0x28
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #41, .LBB46_44
; CHECK-NEXT: .LBB46_107: // %cond.load161
; CHECK-NEXT: mov w9, #41 // =0x29
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #42, .LBB46_45
; CHECK-NEXT: .LBB46_108: // %cond.load165
; CHECK-NEXT: mov w9, #42 // =0x2a
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #43, .LBB46_46
; CHECK-NEXT: .LBB46_109: // %cond.load169
; CHECK-NEXT: mov w9, #43 // =0x2b
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #44, .LBB46_47
; CHECK-NEXT: .LBB46_110: // %cond.load173
; CHECK-NEXT: mov w9, #44 // =0x2c
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #45, .LBB46_48
; CHECK-NEXT: .LBB46_111: // %cond.load177
; CHECK-NEXT: mov w9, #45 // =0x2d
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #46, .LBB46_49
; CHECK-NEXT: .LBB46_112: // %cond.load181
; CHECK-NEXT: mov w9, #46 // =0x2e
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #47, .LBB46_50
; CHECK-NEXT: .LBB46_113: // %cond.load185
; CHECK-NEXT: mov w9, #47 // =0x2f
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #48, .LBB46_51
; CHECK-NEXT: .LBB46_114: // %cond.load189
; CHECK-NEXT: mov w9, #48 // =0x30
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #49, .LBB46_52
; CHECK-NEXT: .LBB46_115: // %cond.load193
; CHECK-NEXT: mov w9, #49 // =0x31
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #50, .LBB46_53
; CHECK-NEXT: .LBB46_116: // %cond.load197
; CHECK-NEXT: mov w9, #50 // =0x32
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #51, .LBB46_54
; CHECK-NEXT: .LBB46_117: // %cond.load201
; CHECK-NEXT: mov w9, #51 // =0x33
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #52, .LBB46_55
; CHECK-NEXT: .LBB46_118: // %cond.load205
; CHECK-NEXT: mov w9, #52 // =0x34
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #53, .LBB46_56
; CHECK-NEXT: .LBB46_119: // %cond.load209
; CHECK-NEXT: mov w9, #53 // =0x35
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #54, .LBB46_57
; CHECK-NEXT: .LBB46_120: // %cond.load213
; CHECK-NEXT: mov w9, #54 // =0x36
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #55, .LBB46_58
; CHECK-NEXT: .LBB46_121: // %cond.load217
; CHECK-NEXT: mov w9, #55 // =0x37
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #56, .LBB46_59
; CHECK-NEXT: .LBB46_122: // %cond.load221
; CHECK-NEXT: mov w9, #56 // =0x38
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #57, .LBB46_60
; CHECK-NEXT: .LBB46_123: // %cond.load225
; CHECK-NEXT: mov w9, #57 // =0x39
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #58, .LBB46_61
; CHECK-NEXT: .LBB46_124: // %cond.load229
; CHECK-NEXT: mov w9, #58 // =0x3a
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #59, .LBB46_62
; CHECK-NEXT: .LBB46_125: // %cond.load233
; CHECK-NEXT: mov w9, #59 // =0x3b
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #60, .LBB46_63
; CHECK-NEXT: .LBB46_126: // %cond.load237
; CHECK-NEXT: mov w9, #60 // =0x3c
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #61, .LBB46_64
; CHECK-NEXT: .LBB46_127: // %cond.load241
; CHECK-NEXT: mov w9, #61 // =0x3d
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz x8, #62, .LBB46_65
; CHECK-NEXT: .LBB46_128: // %cond.load245
; CHECK-NEXT: mov w9, #62 // =0x3e
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbnz x8, #63, .LBB46_66
; CHECK-NEXT: b .LBB46_67
;
; CHECK-EXPAND-LABEL: masked_load_zext_v64i16i32:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl64
; CHECK-EXPAND-NEXT: ld1h { z0.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s
; CHECK-EXPAND-NEXT: whilelo p2.s, xzr, x8
; CHECK-EXPAND-NEXT: ld1h { z0.s }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s
; CHECK-EXPAND-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%b = load <64 x i16>, ptr %bp
%mask = icmp eq <64 x i16> %b, zeroinitializer
%load = call <64 x i16> @llvm.masked.expandload.v64i16(ptr %ap, <64 x i1> %mask, <64 x i16> poison)
%ext = zext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, ptr %c
ret void
}
define void @masked_load_zext_v32i16i64(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_zext_v32i16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #112
; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: ptrue p1.h, vl32
; CHECK-NEXT: str x2, [sp] // 8-byte Spill
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x1]
; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, #0
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: umov w12, v0.b[1]
; CHECK-NEXT: fmov w6, s0
; CHECK-NEXT: umov w3, v0.b[7]
; CHECK-NEXT: umov w5, v0.b[8]
; CHECK-NEXT: mov z5.b, z0.b[18]
; CHECK-NEXT: mov z6.b, z0.b[19]
; CHECK-NEXT: umov w13, v0.b[2]
; CHECK-NEXT: umov w4, v0.b[9]
; CHECK-NEXT: mov z7.b, z0.b[20]
; CHECK-NEXT: umov w1, v0.b[10]
; CHECK-NEXT: and w6, w6, #0x1
; CHECK-NEXT: mov z16.b, z0.b[21]
; CHECK-NEXT: fmov w20, s5
; CHECK-NEXT: fmov w21, s6
; CHECK-NEXT: bfi w6, w12, #1, #1
; CHECK-NEXT: umov w11, v0.b[3]
; CHECK-NEXT: umov w16, v0.b[11]
; CHECK-NEXT: mov z17.b, z0.b[22]
; CHECK-NEXT: fmov w22, s7
; CHECK-NEXT: ubfiz w12, w3, #7, #1
; CHECK-NEXT: ubfiz w3, w5, #8, #1
; CHECK-NEXT: umov w17, v0.b[12]
; CHECK-NEXT: mov z18.b, z0.b[23]
; CHECK-NEXT: bfi w6, w13, #2, #1
; CHECK-NEXT: ubfiz w13, w4, #9, #1
; CHECK-NEXT: umov w18, v0.b[13]
; CHECK-NEXT: mov z19.b, z0.b[24]
; CHECK-NEXT: fmov w23, s16
; CHECK-NEXT: ubfiz w5, w20, #18, #1
; CHECK-NEXT: ubfiz w20, w21, #19, #1
; CHECK-NEXT: orr w12, w12, w3
; CHECK-NEXT: ubfiz w1, w1, #10, #1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: mov z20.b, z0.b[25]
; CHECK-NEXT: fmov w24, s17
; CHECK-NEXT: ubfiz w4, w22, #20, #1
; CHECK-NEXT: orr w12, w12, w13
; CHECK-NEXT: mov z21.b, z0.b[26]
; CHECK-NEXT: fmov w25, s18
; CHECK-NEXT: orr w3, w5, w20
; CHECK-NEXT: bfi w6, w11, #3, #1
; CHECK-NEXT: orr w11, w12, w1
; CHECK-NEXT: ubfiz w12, w16, #11, #1
; CHECK-NEXT: umov w9, v0.b[5]
; CHECK-NEXT: umov w14, v0.b[14]
; CHECK-NEXT: mov z22.b, z0.b[27]
; CHECK-NEXT: fmov w26, s19
; CHECK-NEXT: orr w13, w3, w4
; CHECK-NEXT: ubfiz w3, w23, #21, #1
; CHECK-NEXT: ubfiz w16, w17, #12, #1
; CHECK-NEXT: fmov w27, s20
; CHECK-NEXT: ubfiz w17, w24, #22, #1
; CHECK-NEXT: orr w11, w11, w12
; CHECK-NEXT: ubfiz w12, w18, #13, #1
; CHECK-NEXT: fmov w28, s21
; CHECK-NEXT: orr w13, w13, w3
; CHECK-NEXT: ubfiz w18, w25, #23, #1
; CHECK-NEXT: bfi w6, w10, #4, #1
; CHECK-NEXT: orr w10, w11, w16
; CHECK-NEXT: umov w15, v0.b[15]
; CHECK-NEXT: mov z3.b, z0.b[16]
; CHECK-NEXT: mov z23.b, z0.b[28]
; CHECK-NEXT: fmov w29, s22
; CHECK-NEXT: orr w11, w13, w17
; CHECK-NEXT: orr w10, w10, w12
; CHECK-NEXT: ubfiz w12, w26, #24, #1
; CHECK-NEXT: mov z4.b, z0.b[17]
; CHECK-NEXT: mov z24.b, z0.b[29]
; CHECK-NEXT: orr w11, w11, w18
; CHECK-NEXT: bfi w6, w9, #5, #1
; CHECK-NEXT: ubfiz w9, w14, #14, #1
; CHECK-NEXT: ubfiz w13, w27, #25, #1
; CHECK-NEXT: mov z2.b, z0.b[30]
; CHECK-NEXT: orr w11, w11, w12
; CHECK-NEXT: ubfiz w14, w28, #26, #1
; CHECK-NEXT: fmov w7, s3
; CHECK-NEXT: fmov w30, s23
; CHECK-NEXT: orr w9, w10, w9
; CHECK-NEXT: orr w10, w11, w13
; CHECK-NEXT: ubfiz w11, w29, #27, #1
; CHECK-NEXT: umov w2, v0.b[6]
; CHECK-NEXT: fmov w19, s4
; CHECK-NEXT: fmov w8, s24
; CHECK-NEXT: ubfiz w12, w15, #15, #1
; CHECK-NEXT: orr w10, w10, w14
; CHECK-NEXT: ubfiz w14, w30, #28, #1
; CHECK-NEXT: mov z1.b, z0.b[31]
; CHECK-NEXT: orr w10, w10, w11
; CHECK-NEXT: fmov w11, s2
; CHECK-NEXT: orr w9, w9, w12
; CHECK-NEXT: ubfiz w12, w7, #16, #1
; CHECK-NEXT: ubfiz w13, w19, #17, #1
; CHECK-NEXT: ubfiz w8, w8, #29, #1
; CHECK-NEXT: bfi w6, w2, #6, #1
; CHECK-NEXT: orr w10, w10, w14
; CHECK-NEXT: orr w9, w9, w12
; CHECK-NEXT: ubfiz w11, w11, #30, #1
; CHECK-NEXT: orr w8, w10, w8
; CHECK-NEXT: orr w9, w9, w13
; CHECK-NEXT: orr w9, w6, w9
; CHECK-NEXT: orr w8, w8, w11
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: orr w8, w8, w9, lsl #31
; CHECK-NEXT: tbz w8, #0, .LBB47_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0]
; CHECK-NEXT: add x0, x0, #2
; CHECK-NEXT: tbnz w8, #1, .LBB47_3
; CHECK-NEXT: b .LBB47_4
; CHECK-NEXT: .LBB47_2:
; CHECK-NEXT: adrp x9, .LCPI47_0
; CHECK-NEXT: add x9, x9, :lo12:.LCPI47_0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x9]
; CHECK-NEXT: tbz w8, #1, .LBB47_4
; CHECK-NEXT: .LBB47_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: .LBB47_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB47_36
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB47_37
; CHECK-NEXT: .LBB47_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB47_38
; CHECK-NEXT: .LBB47_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB47_39
; CHECK-NEXT: .LBB47_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB47_40
; CHECK-NEXT: .LBB47_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB47_41
; CHECK-NEXT: .LBB47_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB47_42
; CHECK-NEXT: .LBB47_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB47_43
; CHECK-NEXT: .LBB47_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB47_44
; CHECK-NEXT: .LBB47_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB47_45
; CHECK-NEXT: .LBB47_14: // %else42
; CHECK-NEXT: tbnz w8, #12, .LBB47_46
; CHECK-NEXT: .LBB47_15: // %else46
; CHECK-NEXT: tbnz w8, #13, .LBB47_47
; CHECK-NEXT: .LBB47_16: // %else50
; CHECK-NEXT: tbnz w8, #14, .LBB47_48
; CHECK-NEXT: .LBB47_17: // %else54
; CHECK-NEXT: tbnz w8, #15, .LBB47_49
; CHECK-NEXT: .LBB47_18: // %else58
; CHECK-NEXT: tbnz w8, #16, .LBB47_50
; CHECK-NEXT: .LBB47_19: // %else62
; CHECK-NEXT: tbnz w8, #17, .LBB47_51
; CHECK-NEXT: .LBB47_20: // %else66
; CHECK-NEXT: tbnz w8, #18, .LBB47_52
; CHECK-NEXT: .LBB47_21: // %else70
; CHECK-NEXT: tbnz w8, #19, .LBB47_53
; CHECK-NEXT: .LBB47_22: // %else74
; CHECK-NEXT: tbnz w8, #20, .LBB47_54
; CHECK-NEXT: .LBB47_23: // %else78
; CHECK-NEXT: tbnz w8, #21, .LBB47_55
; CHECK-NEXT: .LBB47_24: // %else82
; CHECK-NEXT: tbnz w8, #22, .LBB47_56
; CHECK-NEXT: .LBB47_25: // %else86
; CHECK-NEXT: tbnz w8, #23, .LBB47_57
; CHECK-NEXT: .LBB47_26: // %else90
; CHECK-NEXT: tbnz w8, #24, .LBB47_58
; CHECK-NEXT: .LBB47_27: // %else94
; CHECK-NEXT: tbnz w8, #25, .LBB47_59
; CHECK-NEXT: .LBB47_28: // %else98
; CHECK-NEXT: tbnz w8, #26, .LBB47_60
; CHECK-NEXT: .LBB47_29: // %else102
; CHECK-NEXT: tbnz w8, #27, .LBB47_61
; CHECK-NEXT: .LBB47_30: // %else106
; CHECK-NEXT: tbnz w8, #28, .LBB47_62
; CHECK-NEXT: .LBB47_31: // %else110
; CHECK-NEXT: tbnz w8, #29, .LBB47_63
; CHECK-NEXT: .LBB47_32: // %else114
; CHECK-NEXT: tbnz w8, #30, .LBB47_64
; CHECK-NEXT: .LBB47_33: // %else118
; CHECK-NEXT: tbz w8, #31, .LBB47_35
; CHECK-NEXT: .LBB47_34: // %cond.load121
; CHECK-NEXT: mov w8, #31 // =0x1f
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w8
; CHECK-NEXT: .LBB47_35: // %else122
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ldr x8, [sp] // 8-byte Reload
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB47_36: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #3, .LBB47_6
; CHECK-NEXT: .LBB47_37: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #4, .LBB47_7
; CHECK-NEXT: .LBB47_38: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #5, .LBB47_8
; CHECK-NEXT: .LBB47_39: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #6, .LBB47_9
; CHECK-NEXT: .LBB47_40: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #7, .LBB47_10
; CHECK-NEXT: .LBB47_41: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #8, .LBB47_11
; CHECK-NEXT: .LBB47_42: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #9, .LBB47_12
; CHECK-NEXT: .LBB47_43: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #10, .LBB47_13
; CHECK-NEXT: .LBB47_44: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #11, .LBB47_14
; CHECK-NEXT: .LBB47_45: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #12, .LBB47_15
; CHECK-NEXT: .LBB47_46: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #13, .LBB47_16
; CHECK-NEXT: .LBB47_47: // %cond.load49
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #14, .LBB47_17
; CHECK-NEXT: .LBB47_48: // %cond.load53
; CHECK-NEXT: mov w9, #14 // =0xe
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #15, .LBB47_18
; CHECK-NEXT: .LBB47_49: // %cond.load57
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #16, .LBB47_19
; CHECK-NEXT: .LBB47_50: // %cond.load61
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #17, .LBB47_20
; CHECK-NEXT: .LBB47_51: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #18, .LBB47_21
; CHECK-NEXT: .LBB47_52: // %cond.load69
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #19, .LBB47_22
; CHECK-NEXT: .LBB47_53: // %cond.load73
; CHECK-NEXT: mov w9, #19 // =0x13
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #20, .LBB47_23
; CHECK-NEXT: .LBB47_54: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #21, .LBB47_24
; CHECK-NEXT: .LBB47_55: // %cond.load81
; CHECK-NEXT: mov w9, #21 // =0x15
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #22, .LBB47_25
; CHECK-NEXT: .LBB47_56: // %cond.load85
; CHECK-NEXT: mov w9, #22 // =0x16
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #23, .LBB47_26
; CHECK-NEXT: .LBB47_57: // %cond.load89
; CHECK-NEXT: mov w9, #23 // =0x17
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #24, .LBB47_27
; CHECK-NEXT: .LBB47_58: // %cond.load93
; CHECK-NEXT: mov w9, #24 // =0x18
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #25, .LBB47_28
; CHECK-NEXT: .LBB47_59: // %cond.load97
; CHECK-NEXT: mov w9, #25 // =0x19
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #26, .LBB47_29
; CHECK-NEXT: .LBB47_60: // %cond.load101
; CHECK-NEXT: mov w9, #26 // =0x1a
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #27, .LBB47_30
; CHECK-NEXT: .LBB47_61: // %cond.load105
; CHECK-NEXT: mov w9, #27 // =0x1b
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #28, .LBB47_31
; CHECK-NEXT: .LBB47_62: // %cond.load109
; CHECK-NEXT: mov w9, #28 // =0x1c
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #29, .LBB47_32
; CHECK-NEXT: .LBB47_63: // %cond.load113
; CHECK-NEXT: mov w9, #29 // =0x1d
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbz w8, #30, .LBB47_33
; CHECK-NEXT: .LBB47_64: // %cond.load117
; CHECK-NEXT: mov w9, #30 // =0x1e
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w9
; CHECK-NEXT: ldrh w9, [x0], #2
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p1/m, w9
; CHECK-NEXT: tbnz w8, #31, .LBB47_34
; CHECK-NEXT: b .LBB47_35
;
; CHECK-EXPAND-LABEL: masked_load_zext_v32i16i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.d, vl32
; CHECK-EXPAND-NEXT: ld1h { z0.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.d
; CHECK-EXPAND-NEXT: whilelo p2.d, xzr, x8
; CHECK-EXPAND-NEXT: ld1h { z0.d }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.d, p1, z0.d
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%b = load <32 x i16>, ptr %bp
%mask = icmp eq <32 x i16> %b, zeroinitializer
%load = call <32 x i16> @llvm.masked.expandload.v32i16(ptr %ap, <32 x i1> %mask, <32 x i16> poison)
%ext = zext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, ptr %c
ret void
}
define void @masked_load_zext_v32i32i64(ptr %ap, ptr %bp, ptr %c) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_load_zext_v32i32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #112
; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w26, -64
; CHECK-NEXT: .cfi_offset w27, -72
; CHECK-NEXT: .cfi_offset w28, -80
; CHECK-NEXT: .cfi_offset w30, -88
; CHECK-NEXT: .cfi_offset w29, -96
; CHECK-NEXT: ptrue p1.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x1]
; CHECK-NEXT: cmpeq p0.s, p1/z, z0.s, #0
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: mov z5.b, z0.b[18]
; CHECK-NEXT: mov z6.b, z0.b[19]
; CHECK-NEXT: umov w13, v0.b[1]
; CHECK-NEXT: umov w3, v0.b[7]
; CHECK-NEXT: umov w4, v0.b[8]
; CHECK-NEXT: mov z7.b, z0.b[20]
; CHECK-NEXT: fmov w6, s0
; CHECK-NEXT: umov w5, v0.b[9]
; CHECK-NEXT: umov w12, v0.b[2]
; CHECK-NEXT: mov z16.b, z0.b[21]
; CHECK-NEXT: fmov w20, s5
; CHECK-NEXT: fmov w21, s6
; CHECK-NEXT: umov w18, v0.b[10]
; CHECK-NEXT: mov z17.b, z0.b[22]
; CHECK-NEXT: fmov w22, s7
; CHECK-NEXT: and w6, w6, #0x1
; CHECK-NEXT: umov w11, v0.b[3]
; CHECK-NEXT: umov w1, v0.b[11]
; CHECK-NEXT: bfi w6, w13, #1, #1
; CHECK-NEXT: ubfiz w13, w3, #7, #1
; CHECK-NEXT: ubfiz w3, w4, #8, #1
; CHECK-NEXT: fmov w23, s16
; CHECK-NEXT: ubfiz w4, w5, #9, #1
; CHECK-NEXT: ubfiz w5, w20, #18, #1
; CHECK-NEXT: ubfiz w20, w21, #19, #1
; CHECK-NEXT: umov w16, v0.b[12]
; CHECK-NEXT: mov z18.b, z0.b[23]
; CHECK-NEXT: fmov w24, s17
; CHECK-NEXT: bfi w6, w12, #2, #1
; CHECK-NEXT: orr w12, w13, w3
; CHECK-NEXT: ubfiz w13, w22, #20, #1
; CHECK-NEXT: umov w17, v0.b[13]
; CHECK-NEXT: mov z19.b, z0.b[24]
; CHECK-NEXT: orr w3, w5, w20
; CHECK-NEXT: ubfiz w18, w18, #10, #1
; CHECK-NEXT: umov w10, v0.b[4]
; CHECK-NEXT: mov z20.b, z0.b[25]
; CHECK-NEXT: orr w12, w12, w4
; CHECK-NEXT: orr w13, w3, w13
; CHECK-NEXT: ubfiz w3, w23, #21, #1
; CHECK-NEXT: umov w14, v0.b[14]
; CHECK-NEXT: mov z21.b, z0.b[26]
; CHECK-NEXT: fmov w25, s18
; CHECK-NEXT: ubfiz w1, w1, #11, #1
; CHECK-NEXT: bfi w6, w11, #3, #1
; CHECK-NEXT: orr w11, w12, w18
; CHECK-NEXT: ubfiz w12, w24, #22, #1
; CHECK-NEXT: umov w9, v0.b[5]
; CHECK-NEXT: mov z22.b, z0.b[27]
; CHECK-NEXT: fmov w26, s19
; CHECK-NEXT: orr w13, w13, w3
; CHECK-NEXT: ubfiz w16, w16, #12, #1
; CHECK-NEXT: fmov w27, s20
; CHECK-NEXT: orr w11, w11, w1
; CHECK-NEXT: orr w12, w13, w12
; CHECK-NEXT: ubfiz w13, w17, #13, #1
; CHECK-NEXT: umov w8, v0.b[6]
; CHECK-NEXT: mov z24.b, z0.b[29]
; CHECK-NEXT: fmov w28, s21
; CHECK-NEXT: ubfiz w17, w25, #23, #1
; CHECK-NEXT: bfi w6, w10, #4, #1
; CHECK-NEXT: orr w10, w11, w16
; CHECK-NEXT: mov z3.b, z0.b[16]
; CHECK-NEXT: mov z23.b, z0.b[28]
; CHECK-NEXT: fmov w29, s22
; CHECK-NEXT: ubfiz w11, w26, #24, #1
; CHECK-NEXT: orr w10, w10, w13
; CHECK-NEXT: ubfiz w13, w14, #14, #1
; CHECK-NEXT: umov w15, v0.b[15]
; CHECK-NEXT: mov z4.b, z0.b[17]
; CHECK-NEXT: orr w12, w12, w17
; CHECK-NEXT: ubfiz w14, w27, #25, #1
; CHECK-NEXT: bfi w6, w9, #5, #1
; CHECK-NEXT: mov z2.b, z0.b[30]
; CHECK-NEXT: orr w11, w12, w11
; CHECK-NEXT: ubfiz w9, w28, #26, #1
; CHECK-NEXT: orr w10, w10, w13
; CHECK-NEXT: fmov w13, s24
; CHECK-NEXT: fmov w7, s3
; CHECK-NEXT: fmov w30, s23
; CHECK-NEXT: orr w11, w11, w14
; CHECK-NEXT: bfi w6, w8, #6, #1
; CHECK-NEXT: ubfiz w8, w29, #27, #1
; CHECK-NEXT: fmov w19, s4
; CHECK-NEXT: orr w9, w11, w9
; CHECK-NEXT: ubfiz w12, w15, #15, #1
; CHECK-NEXT: mov z1.b, z0.b[31]
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: ubfiz w9, w13, #29, #1
; CHECK-NEXT: fmov w13, s2
; CHECK-NEXT: ubfiz w11, w7, #16, #1
; CHECK-NEXT: ubfiz w14, w30, #28, #1
; CHECK-NEXT: orr w10, w10, w12
; CHECK-NEXT: ubfiz w12, w19, #17, #1
; CHECK-NEXT: orr w10, w10, w11
; CHECK-NEXT: orr w8, w8, w14
; CHECK-NEXT: ubfiz w11, w13, #30, #1
; CHECK-NEXT: orr w10, w10, w12
; CHECK-NEXT: orr w8, w8, w9
; CHECK-NEXT: orr w9, w6, w10
; CHECK-NEXT: orr w8, w8, w11
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: orr w8, w8, w9, lsl #31
; CHECK-NEXT: tbz w8, #0, .LBB48_2
; CHECK-NEXT: // %bb.1: // %cond.load
; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0]
; CHECK-NEXT: add x0, x0, #4
; CHECK-NEXT: tbnz w8, #1, .LBB48_3
; CHECK-NEXT: b .LBB48_4
; CHECK-NEXT: .LBB48_2:
; CHECK-NEXT: adrp x9, .LCPI48_0
; CHECK-NEXT: add x9, x9, :lo12:.LCPI48_0
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x9]
; CHECK-NEXT: tbz w8, #1, .LBB48_4
; CHECK-NEXT: .LBB48_3: // %cond.load1
; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: .LBB48_4: // %else2
; CHECK-NEXT: tbnz w8, #2, .LBB48_36
; CHECK-NEXT: // %bb.5: // %else6
; CHECK-NEXT: tbnz w8, #3, .LBB48_37
; CHECK-NEXT: .LBB48_6: // %else10
; CHECK-NEXT: tbnz w8, #4, .LBB48_38
; CHECK-NEXT: .LBB48_7: // %else14
; CHECK-NEXT: tbnz w8, #5, .LBB48_39
; CHECK-NEXT: .LBB48_8: // %else18
; CHECK-NEXT: tbnz w8, #6, .LBB48_40
; CHECK-NEXT: .LBB48_9: // %else22
; CHECK-NEXT: tbnz w8, #7, .LBB48_41
; CHECK-NEXT: .LBB48_10: // %else26
; CHECK-NEXT: tbnz w8, #8, .LBB48_42
; CHECK-NEXT: .LBB48_11: // %else30
; CHECK-NEXT: tbnz w8, #9, .LBB48_43
; CHECK-NEXT: .LBB48_12: // %else34
; CHECK-NEXT: tbnz w8, #10, .LBB48_44
; CHECK-NEXT: .LBB48_13: // %else38
; CHECK-NEXT: tbnz w8, #11, .LBB48_45
; CHECK-NEXT: .LBB48_14: // %else42
; CHECK-NEXT: tbnz w8, #12, .LBB48_46
; CHECK-NEXT: .LBB48_15: // %else46
; CHECK-NEXT: tbnz w8, #13, .LBB48_47
; CHECK-NEXT: .LBB48_16: // %else50
; CHECK-NEXT: tbnz w8, #14, .LBB48_48
; CHECK-NEXT: .LBB48_17: // %else54
; CHECK-NEXT: tbnz w8, #15, .LBB48_49
; CHECK-NEXT: .LBB48_18: // %else58
; CHECK-NEXT: tbnz w8, #16, .LBB48_50
; CHECK-NEXT: .LBB48_19: // %else62
; CHECK-NEXT: tbnz w8, #17, .LBB48_51
; CHECK-NEXT: .LBB48_20: // %else66
; CHECK-NEXT: tbnz w8, #18, .LBB48_52
; CHECK-NEXT: .LBB48_21: // %else70
; CHECK-NEXT: tbnz w8, #19, .LBB48_53
; CHECK-NEXT: .LBB48_22: // %else74
; CHECK-NEXT: tbnz w8, #20, .LBB48_54
; CHECK-NEXT: .LBB48_23: // %else78
; CHECK-NEXT: tbnz w8, #21, .LBB48_55
; CHECK-NEXT: .LBB48_24: // %else82
; CHECK-NEXT: tbnz w8, #22, .LBB48_56
; CHECK-NEXT: .LBB48_25: // %else86
; CHECK-NEXT: tbnz w8, #23, .LBB48_57
; CHECK-NEXT: .LBB48_26: // %else90
; CHECK-NEXT: tbnz w8, #24, .LBB48_58
; CHECK-NEXT: .LBB48_27: // %else94
; CHECK-NEXT: tbnz w8, #25, .LBB48_59
; CHECK-NEXT: .LBB48_28: // %else98
; CHECK-NEXT: tbnz w8, #26, .LBB48_60
; CHECK-NEXT: .LBB48_29: // %else102
; CHECK-NEXT: tbnz w8, #27, .LBB48_61
; CHECK-NEXT: .LBB48_30: // %else106
; CHECK-NEXT: tbnz w8, #28, .LBB48_62
; CHECK-NEXT: .LBB48_31: // %else110
; CHECK-NEXT: tbnz w8, #29, .LBB48_63
; CHECK-NEXT: .LBB48_32: // %else114
; CHECK-NEXT: tbnz w8, #30, .LBB48_64
; CHECK-NEXT: .LBB48_33: // %else118
; CHECK-NEXT: tbz w8, #31, .LBB48_35
; CHECK-NEXT: .LBB48_34: // %cond.load121
; CHECK-NEXT: mov w8, #31 // =0x1f
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w8
; CHECK-NEXT: .LBB48_35: // %else122
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB48_36: // %cond.load5
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #3, .LBB48_6
; CHECK-NEXT: .LBB48_37: // %cond.load9
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #4, .LBB48_7
; CHECK-NEXT: .LBB48_38: // %cond.load13
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #5, .LBB48_8
; CHECK-NEXT: .LBB48_39: // %cond.load17
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #6, .LBB48_9
; CHECK-NEXT: .LBB48_40: // %cond.load21
; CHECK-NEXT: mov w9, #6 // =0x6
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #7, .LBB48_10
; CHECK-NEXT: .LBB48_41: // %cond.load25
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #8, .LBB48_11
; CHECK-NEXT: .LBB48_42: // %cond.load29
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #9, .LBB48_12
; CHECK-NEXT: .LBB48_43: // %cond.load33
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #10, .LBB48_13
; CHECK-NEXT: .LBB48_44: // %cond.load37
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #11, .LBB48_14
; CHECK-NEXT: .LBB48_45: // %cond.load41
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #12, .LBB48_15
; CHECK-NEXT: .LBB48_46: // %cond.load45
; CHECK-NEXT: mov w9, #12 // =0xc
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #13, .LBB48_16
; CHECK-NEXT: .LBB48_47: // %cond.load49
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #14, .LBB48_17
; CHECK-NEXT: .LBB48_48: // %cond.load53
; CHECK-NEXT: mov w9, #14 // =0xe
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #15, .LBB48_18
; CHECK-NEXT: .LBB48_49: // %cond.load57
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #16, .LBB48_19
; CHECK-NEXT: .LBB48_50: // %cond.load61
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #17, .LBB48_20
; CHECK-NEXT: .LBB48_51: // %cond.load65
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #18, .LBB48_21
; CHECK-NEXT: .LBB48_52: // %cond.load69
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #19, .LBB48_22
; CHECK-NEXT: .LBB48_53: // %cond.load73
; CHECK-NEXT: mov w9, #19 // =0x13
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #20, .LBB48_23
; CHECK-NEXT: .LBB48_54: // %cond.load77
; CHECK-NEXT: mov w9, #20 // =0x14
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #21, .LBB48_24
; CHECK-NEXT: .LBB48_55: // %cond.load81
; CHECK-NEXT: mov w9, #21 // =0x15
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #22, .LBB48_25
; CHECK-NEXT: .LBB48_56: // %cond.load85
; CHECK-NEXT: mov w9, #22 // =0x16
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #23, .LBB48_26
; CHECK-NEXT: .LBB48_57: // %cond.load89
; CHECK-NEXT: mov w9, #23 // =0x17
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #24, .LBB48_27
; CHECK-NEXT: .LBB48_58: // %cond.load93
; CHECK-NEXT: mov w9, #24 // =0x18
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #25, .LBB48_28
; CHECK-NEXT: .LBB48_59: // %cond.load97
; CHECK-NEXT: mov w9, #25 // =0x19
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #26, .LBB48_29
; CHECK-NEXT: .LBB48_60: // %cond.load101
; CHECK-NEXT: mov w9, #26 // =0x1a
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #27, .LBB48_30
; CHECK-NEXT: .LBB48_61: // %cond.load105
; CHECK-NEXT: mov w9, #27 // =0x1b
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #28, .LBB48_31
; CHECK-NEXT: .LBB48_62: // %cond.load109
; CHECK-NEXT: mov w9, #28 // =0x1c
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #29, .LBB48_32
; CHECK-NEXT: .LBB48_63: // %cond.load113
; CHECK-NEXT: mov w9, #29 // =0x1d
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbz w8, #30, .LBB48_33
; CHECK-NEXT: .LBB48_64: // %cond.load117
; CHECK-NEXT: mov w9, #30 // =0x1e
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w9
; CHECK-NEXT: ldr w9, [x0], #4
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p1/m, w9
; CHECK-NEXT: tbnz w8, #31, .LBB48_34
; CHECK-NEXT: b .LBB48_35
;
; CHECK-EXPAND-LABEL: masked_load_zext_v32i32i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.d, vl32
; CHECK-EXPAND-NEXT: ld1w { z0.d }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpeq p1.d, p0/z, z0.d, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.d
; CHECK-EXPAND-NEXT: whilelo p2.d, xzr, x8
; CHECK-EXPAND-NEXT: ld1w { z0.d }, p2/z, [x0]
; CHECK-EXPAND-NEXT: expand z0.d, p1, z0.d
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: ret
%b = load <32 x i32>, ptr %bp
%mask = icmp eq <32 x i32> %b, zeroinitializer
%load = call <32 x i32> @llvm.masked.expandload.v32i32(ptr %ap, <32 x i1> %mask, <32 x i32> poison)
%ext = zext <32 x i32> %load to <32 x i64>
store <32 x i64> %ext, ptr %c
ret void
}
define void @masked_load_sext_ugt_v8i32i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_sext_ugt_v8i32i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x1]
; VBITS_GE_256-NEXT: cmpne p0.s, p1/z, z0.s, #0
; VBITS_GE_256-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p0.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB49_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rw { z0.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #4
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB49_3
; VBITS_GE_256-NEXT: b .LBB49_4
; VBITS_GE_256-NEXT: .LBB49_2:
; VBITS_GE_256-NEXT: adrp x9, .LCPI49_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI49_0
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB49_4
; VBITS_GE_256-NEXT: .LBB49_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: .LBB49_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB49_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB49_13
; VBITS_GE_256-NEXT: .LBB49_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB49_14
; VBITS_GE_256-NEXT: .LBB49_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB49_15
; VBITS_GE_256-NEXT: .LBB49_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB49_16
; VBITS_GE_256-NEXT: .LBB49_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB49_11
; VBITS_GE_256-NEXT: .LBB49_10: // %cond.load25
; VBITS_GE_256-NEXT: mov w8, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w8
; VBITS_GE_256-NEXT: ldr w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w8
; VBITS_GE_256-NEXT: .LBB49_11: // %else26
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB49_12: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB49_6
; VBITS_GE_256-NEXT: .LBB49_13: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB49_7
; VBITS_GE_256-NEXT: .LBB49_14: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB49_8
; VBITS_GE_256-NEXT: .LBB49_15: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB49_9
; VBITS_GE_256-NEXT: .LBB49_16: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB49_10
; VBITS_GE_256-NEXT: b .LBB49_11
;
; VBITS_GE_512-LABEL: masked_load_sext_ugt_v8i32i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p1.s, vl8
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x1]
; VBITS_GE_512-NEXT: cmpne p0.s, p1/z, z0.s, #0
; VBITS_GE_512-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB49_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rw { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #4
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB49_3
; VBITS_GE_512-NEXT: b .LBB49_4
; VBITS_GE_512-NEXT: .LBB49_2:
; VBITS_GE_512-NEXT: adrp x9, .LCPI49_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI49_0
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB49_4
; VBITS_GE_512-NEXT: .LBB49_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: .LBB49_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB49_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB49_13
; VBITS_GE_512-NEXT: .LBB49_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB49_14
; VBITS_GE_512-NEXT: .LBB49_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB49_15
; VBITS_GE_512-NEXT: .LBB49_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB49_16
; VBITS_GE_512-NEXT: .LBB49_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB49_11
; VBITS_GE_512-NEXT: .LBB49_10: // %cond.load25
; VBITS_GE_512-NEXT: mov w8, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w8
; VBITS_GE_512-NEXT: ldr w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w8
; VBITS_GE_512-NEXT: .LBB49_11: // %else26
; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB49_12: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB49_6
; VBITS_GE_512-NEXT: .LBB49_13: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB49_7
; VBITS_GE_512-NEXT: .LBB49_14: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB49_8
; VBITS_GE_512-NEXT: .LBB49_15: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB49_9
; VBITS_GE_512-NEXT: .LBB49_16: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB49_10
; VBITS_GE_512-NEXT: b .LBB49_11
;
; CHECK-EXPAND-LABEL: masked_load_sext_ugt_v8i32i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpne p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s
; CHECK-EXPAND-NEXT: whilelo p0.s, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: sunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: sunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i32>, ptr %bp
%mask = icmp ugt <8 x i32> %b, zeroinitializer
%load = call <8 x i32> @llvm.masked.expandload.v8i32(ptr %ap, <8 x i1> %mask, <8 x i32> poison)
%ext = sext <8 x i32> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
define void @masked_load_zext_sgt_v8i32i64(ptr %ap, ptr %bp, ptr %c) #0 {
; VBITS_GE_256-LABEL: masked_load_zext_sgt_v8i32i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: sub sp, sp, #16
; VBITS_GE_256-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_256-NEXT: ptrue p1.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x1]
; VBITS_GE_256-NEXT: cmpgt p0.s, p1/z, z0.s, #0
; VBITS_GE_256-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p0.s
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_256-NEXT: umov w8, v0.b[0]
; VBITS_GE_256-NEXT: umov w9, v0.b[1]
; VBITS_GE_256-NEXT: umov w10, v0.b[2]
; VBITS_GE_256-NEXT: and w8, w8, #0x1
; VBITS_GE_256-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[3]
; VBITS_GE_256-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[4]
; VBITS_GE_256-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[5]
; VBITS_GE_256-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_256-NEXT: umov w10, v0.b[6]
; VBITS_GE_256-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_256-NEXT: umov w9, v0.b[7]
; VBITS_GE_256-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_256-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_256-NEXT: and w8, w9, #0xff
; VBITS_GE_256-NEXT: tbz w9, #0, .LBB50_2
; VBITS_GE_256-NEXT: // %bb.1: // %cond.load
; VBITS_GE_256-NEXT: ld1rw { z0.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: add x0, x0, #4
; VBITS_GE_256-NEXT: tbnz w8, #1, .LBB50_3
; VBITS_GE_256-NEXT: b .LBB50_4
; VBITS_GE_256-NEXT: .LBB50_2:
; VBITS_GE_256-NEXT: adrp x9, .LCPI50_0
; VBITS_GE_256-NEXT: add x9, x9, :lo12:.LCPI50_0
; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x9]
; VBITS_GE_256-NEXT: tbz w8, #1, .LBB50_4
; VBITS_GE_256-NEXT: .LBB50_3: // %cond.load1
; VBITS_GE_256-NEXT: mov w9, #1 // =0x1
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: .LBB50_4: // %else2
; VBITS_GE_256-NEXT: tbnz w8, #2, .LBB50_12
; VBITS_GE_256-NEXT: // %bb.5: // %else6
; VBITS_GE_256-NEXT: tbnz w8, #3, .LBB50_13
; VBITS_GE_256-NEXT: .LBB50_6: // %else10
; VBITS_GE_256-NEXT: tbnz w8, #4, .LBB50_14
; VBITS_GE_256-NEXT: .LBB50_7: // %else14
; VBITS_GE_256-NEXT: tbnz w8, #5, .LBB50_15
; VBITS_GE_256-NEXT: .LBB50_8: // %else18
; VBITS_GE_256-NEXT: tbnz w8, #6, .LBB50_16
; VBITS_GE_256-NEXT: .LBB50_9: // %else22
; VBITS_GE_256-NEXT: tbz w8, #7, .LBB50_11
; VBITS_GE_256-NEXT: .LBB50_10: // %cond.load25
; VBITS_GE_256-NEXT: mov w8, #7 // =0x7
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w8
; VBITS_GE_256-NEXT: ldr w8, [x0]
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w8
; VBITS_GE_256-NEXT: .LBB50_11: // %else26
; VBITS_GE_256-NEXT: movprfx z1, z0
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; VBITS_GE_256-NEXT: add sp, sp, #16
; VBITS_GE_256-NEXT: ret
; VBITS_GE_256-NEXT: .LBB50_12: // %cond.load5
; VBITS_GE_256-NEXT: mov w9, #2 // =0x2
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #3, .LBB50_6
; VBITS_GE_256-NEXT: .LBB50_13: // %cond.load9
; VBITS_GE_256-NEXT: mov w9, #3 // =0x3
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #4, .LBB50_7
; VBITS_GE_256-NEXT: .LBB50_14: // %cond.load13
; VBITS_GE_256-NEXT: mov w9, #4 // =0x4
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #5, .LBB50_8
; VBITS_GE_256-NEXT: .LBB50_15: // %cond.load17
; VBITS_GE_256-NEXT: mov w9, #5 // =0x5
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbz w8, #6, .LBB50_9
; VBITS_GE_256-NEXT: .LBB50_16: // %cond.load21
; VBITS_GE_256-NEXT: mov w9, #6 // =0x6
; VBITS_GE_256-NEXT: index z1.s, #0, #1
; VBITS_GE_256-NEXT: mov z2.s, w9
; VBITS_GE_256-NEXT: ldr w9, [x0], #4
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_256-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_256-NEXT: tbnz w8, #7, .LBB50_10
; VBITS_GE_256-NEXT: b .LBB50_11
;
; VBITS_GE_512-LABEL: masked_load_zext_sgt_v8i32i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: sub sp, sp, #16
; VBITS_GE_512-NEXT: .cfi_def_cfa_offset 16
; VBITS_GE_512-NEXT: ptrue p1.s, vl8
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x1]
; VBITS_GE_512-NEXT: cmpgt p0.s, p1/z, z0.s, #0
; VBITS_GE_512-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p0.s
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
; VBITS_GE_512-NEXT: umov w8, v0.b[0]
; VBITS_GE_512-NEXT: umov w9, v0.b[1]
; VBITS_GE_512-NEXT: umov w10, v0.b[2]
; VBITS_GE_512-NEXT: and w8, w8, #0x1
; VBITS_GE_512-NEXT: bfi w8, w9, #1, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[3]
; VBITS_GE_512-NEXT: bfi w8, w10, #2, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[4]
; VBITS_GE_512-NEXT: bfi w8, w9, #3, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[5]
; VBITS_GE_512-NEXT: bfi w8, w10, #4, #1
; VBITS_GE_512-NEXT: umov w10, v0.b[6]
; VBITS_GE_512-NEXT: bfi w8, w9, #5, #1
; VBITS_GE_512-NEXT: umov w9, v0.b[7]
; VBITS_GE_512-NEXT: bfi w8, w10, #6, #1
; VBITS_GE_512-NEXT: orr w9, w8, w9, lsl #7
; VBITS_GE_512-NEXT: and w8, w9, #0xff
; VBITS_GE_512-NEXT: tbz w9, #0, .LBB50_2
; VBITS_GE_512-NEXT: // %bb.1: // %cond.load
; VBITS_GE_512-NEXT: ld1rw { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: add x0, x0, #4
; VBITS_GE_512-NEXT: tbnz w8, #1, .LBB50_3
; VBITS_GE_512-NEXT: b .LBB50_4
; VBITS_GE_512-NEXT: .LBB50_2:
; VBITS_GE_512-NEXT: adrp x9, .LCPI50_0
; VBITS_GE_512-NEXT: add x9, x9, :lo12:.LCPI50_0
; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x9]
; VBITS_GE_512-NEXT: tbz w8, #1, .LBB50_4
; VBITS_GE_512-NEXT: .LBB50_3: // %cond.load1
; VBITS_GE_512-NEXT: mov w9, #1 // =0x1
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: .LBB50_4: // %else2
; VBITS_GE_512-NEXT: tbnz w8, #2, .LBB50_12
; VBITS_GE_512-NEXT: // %bb.5: // %else6
; VBITS_GE_512-NEXT: tbnz w8, #3, .LBB50_13
; VBITS_GE_512-NEXT: .LBB50_6: // %else10
; VBITS_GE_512-NEXT: tbnz w8, #4, .LBB50_14
; VBITS_GE_512-NEXT: .LBB50_7: // %else14
; VBITS_GE_512-NEXT: tbnz w8, #5, .LBB50_15
; VBITS_GE_512-NEXT: .LBB50_8: // %else18
; VBITS_GE_512-NEXT: tbnz w8, #6, .LBB50_16
; VBITS_GE_512-NEXT: .LBB50_9: // %else22
; VBITS_GE_512-NEXT: tbz w8, #7, .LBB50_11
; VBITS_GE_512-NEXT: .LBB50_10: // %cond.load25
; VBITS_GE_512-NEXT: mov w8, #7 // =0x7
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w8
; VBITS_GE_512-NEXT: ldr w8, [x0]
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w8
; VBITS_GE_512-NEXT: .LBB50_11: // %else26
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: add sp, sp, #16
; VBITS_GE_512-NEXT: ret
; VBITS_GE_512-NEXT: .LBB50_12: // %cond.load5
; VBITS_GE_512-NEXT: mov w9, #2 // =0x2
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #3, .LBB50_6
; VBITS_GE_512-NEXT: .LBB50_13: // %cond.load9
; VBITS_GE_512-NEXT: mov w9, #3 // =0x3
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #4, .LBB50_7
; VBITS_GE_512-NEXT: .LBB50_14: // %cond.load13
; VBITS_GE_512-NEXT: mov w9, #4 // =0x4
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #5, .LBB50_8
; VBITS_GE_512-NEXT: .LBB50_15: // %cond.load17
; VBITS_GE_512-NEXT: mov w9, #5 // =0x5
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbz w8, #6, .LBB50_9
; VBITS_GE_512-NEXT: .LBB50_16: // %cond.load21
; VBITS_GE_512-NEXT: mov w9, #6 // =0x6
; VBITS_GE_512-NEXT: index z1.s, #0, #1
; VBITS_GE_512-NEXT: mov z2.s, w9
; VBITS_GE_512-NEXT: ldr w9, [x0], #4
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s
; VBITS_GE_512-NEXT: mov z0.s, p1/m, w9
; VBITS_GE_512-NEXT: tbnz w8, #7, .LBB50_10
; VBITS_GE_512-NEXT: b .LBB50_11
;
; CHECK-EXPAND-LABEL: masked_load_zext_sgt_v8i32i64:
; CHECK-EXPAND: // %bb.0:
; CHECK-EXPAND-NEXT: ptrue p0.s, vl8
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x1]
; CHECK-EXPAND-NEXT: cmpgt p1.s, p0/z, z0.s, #0
; CHECK-EXPAND-NEXT: cntp x8, p1, p1.s
; CHECK-EXPAND-NEXT: whilelo p0.s, xzr, x8
; CHECK-EXPAND-NEXT: mov x8, #4 // =0x4
; CHECK-EXPAND-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-EXPAND-NEXT: ptrue p0.d, vl4
; CHECK-EXPAND-NEXT: expand z0.s, p1, z0.s
; CHECK-EXPAND-NEXT: movprfx z1, z0
; CHECK-EXPAND-NEXT: ext z1.b, z1.b, z0.b, #16
; CHECK-EXPAND-NEXT: uunpklo z0.d, z0.s
; CHECK-EXPAND-NEXT: uunpklo z1.d, z1.s
; CHECK-EXPAND-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-EXPAND-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3]
; CHECK-EXPAND-NEXT: ret
%b = load <8 x i32>, ptr %bp
%mask = icmp sgt <8 x i32> %b, zeroinitializer
%load = call <8 x i32> @llvm.masked.expandload.v8i32(ptr %ap, <8 x i1> %mask, <8 x i32> poison)
%ext = zext <8 x i32> %load to <8 x i64>
store <8 x i64> %ext, ptr %c
ret void
}
attributes #0 = { "target-features"="+sve" }