blob: 584753bffdbe00ee2885ae2c5cf7b37f49ab8501 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mattr=+sve < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
define void @sve_load_store_nxv1i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv1i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 1 x i8>, ptr %a
store <vscale x 1 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv2i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 2 x i8>, ptr %a
store <vscale x 2 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv3i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv3i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 3 x i8>, ptr %a
store <vscale x 3 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv4i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 4 x i8>, ptr %a
store <vscale x 4 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv5i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv5i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 5 x i8>, ptr %a
store <vscale x 5 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv6i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv6i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1b { z1.s }, p1/z, [x0]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1b { z1.s }, p1, [x1]
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1b { z0.d }, p0, [x1, #2, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 6 x i8>, ptr %a
store <vscale x 6 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv7i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv7i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 7 x i8>, ptr %a
store <vscale x 7 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv8i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 8 x i8>, ptr %a
store <vscale x 8 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv9i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv9i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 9 x i8>, ptr %a
store <vscale x 9 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv10i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv10i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, #4, mul vl]
; CHECK-NEXT: ld1b { z1.h }, p1/z, [x0]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
; CHECK-NEXT: uunpkhi z1.h, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: st1b { z0.h }, p1, [x1]
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: st1b { z1.d }, p0, [x1, #4, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 10 x i8>, ptr %a
store <vscale x 10 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv11i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv11i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 11 x i8>, ptr %a
store <vscale x 11 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv12i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv12i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1b { z1.h }, p1/z, [x0]
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
; CHECK-NEXT: uunpkhi z1.h, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: st1b { z0.h }, p1, [x1]
; CHECK-NEXT: st1b { z1.s }, p0, [x1, #2, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 12 x i8>, ptr %a
store <vscale x 12 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv13i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv13i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 13 x i8>, ptr %a
store <vscale x 13 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv14i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv14i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, #6, mul vl]
; CHECK-NEXT: ptrue p2.h
; CHECK-NEXT: ld1b { z1.s }, p1/z, [x0, #2, mul vl]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: ld1b { z1.h }, p2/z, [x0]
; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
; CHECK-NEXT: uunpkhi z1.h, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpkhi z2.s, z1.h
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: st1b { z0.h }, p2, [x1]
; CHECK-NEXT: uunpklo z2.d, z2.s
; CHECK-NEXT: st1b { z1.s }, p1, [x1, #2, mul vl]
; CHECK-NEXT: st1b { z2.d }, p0, [x1, #6, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 14 x i8>, ptr %a
store <vscale x 14 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv15i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv15i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 15 x i8>, ptr %a
store <vscale x 15 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv16i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 16 x i8>, ptr %a
store <vscale x 16 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv17i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv17i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w10, #17 // =0x11
; CHECK-NEXT: lsr x9, x8, #4
; CHECK-NEXT: mul x9, x9, x10
; CHECK-NEXT: whilelo p0.b, x8, x9
; CHECK-NEXT: whilelo p1.b, xzr, x9
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
; CHECK-NEXT: st1b { z1.b }, p1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 17 x i8>, ptr %a
store <vscale x 17 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv18i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv18i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, x8]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z1.s, z0.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: uzp1 z1.s, z1.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpkhi z1.s, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpkhi z1.d, z1.s
; CHECK-NEXT: uzp1 z1.s, z0.s, z1.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpkhi z1.s, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: uzp1 z1.s, z1.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b
; CHECK-NEXT: uunpkhi z1.h, z1.b
; CHECK-NEXT: uunpklo z2.s, z1.h
; CHECK-NEXT: uunpkhi z1.s, z1.h
; CHECK-NEXT: uunpkhi z2.d, z2.s
; CHECK-NEXT: uzp1 z2.s, z0.s, z2.s
; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
; CHECK-NEXT: uunpkhi z1.h, z1.b
; CHECK-NEXT: uunpklo z2.s, z1.h
; CHECK-NEXT: uunpkhi z1.s, z1.h
; CHECK-NEXT: uunpklo z2.d, z2.s
; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s
; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
; CHECK-NEXT: uunpkhi z1.h, z1.b
; CHECK-NEXT: uunpkhi z2.s, z1.h
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpkhi z2.d, z2.s
; CHECK-NEXT: uzp1 z2.s, z0.s, z2.s
; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
; CHECK-NEXT: uunpkhi z1.h, z1.b
; CHECK-NEXT: uunpkhi z2.s, z1.h
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpklo z2.d, z2.s
; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s
; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
; CHECK-NEXT: ldr z1, [x0]
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1b { z0.d }, p0, [x1, x8]
; CHECK-NEXT: str z1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 18 x i8>, ptr %a
store <vscale x 18 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv19i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv19i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w10, #19 // =0x13
; CHECK-NEXT: lsr x9, x8, #4
; CHECK-NEXT: mul x9, x9, x10
; CHECK-NEXT: whilelo p0.b, x8, x9
; CHECK-NEXT: whilelo p1.b, xzr, x9
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
; CHECK-NEXT: st1b { z1.b }, p1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 19 x i8>, ptr %a
store <vscale x 19 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv20i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv20i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ld1b { z1.s }, p0/z, [x0, #4, mul vl]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b
; CHECK-NEXT: uunpkhi z1.h, z1.b
; CHECK-NEXT: uunpkhi z1.s, z1.h
; CHECK-NEXT: uzp1 z1.h, z0.h, z1.h
; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
; CHECK-NEXT: uunpkhi z1.h, z1.b
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uzp1 z1.h, z1.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: st1b { z0.s }, p0, [x1, #4, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 20 x i8>, ptr %a
store <vscale x 20 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv21i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv21i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w10, #21 // =0x15
; CHECK-NEXT: lsr x9, x8, #4
; CHECK-NEXT: mul x9, x9, x10
; CHECK-NEXT: whilelo p0.b, x8, x9
; CHECK-NEXT: whilelo p1.b, xzr, x9
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
; CHECK-NEXT: st1b { z1.b }, p1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 21 x i8>, ptr %a
store <vscale x 21 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv22i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv22i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cntw x8, all, mul #5
; CHECK-NEXT: ldr z2, [x0]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, #4, mul vl]
; CHECK-NEXT: ld1b { z1.d }, p1/z, [x0, x8]
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uzp1 z1.s, z1.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b
; CHECK-NEXT: uunpkhi z1.h, z1.b
; CHECK-NEXT: uunpkhi z1.s, z1.h
; CHECK-NEXT: uzp1 z1.h, z0.h, z1.h
; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
; CHECK-NEXT: uunpkhi z1.h, z1.b
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uzp1 z1.h, z1.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpkhi z1.s, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: st1b { z1.d }, p1, [x1, x8]
; CHECK-NEXT: st1b { z0.s }, p0, [x1, #4, mul vl]
; CHECK-NEXT: str z2, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 22 x i8>, ptr %a
store <vscale x 22 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv23i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv23i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w10, #23 // =0x17
; CHECK-NEXT: lsr x9, x8, #4
; CHECK-NEXT: mul x9, x9, x10
; CHECK-NEXT: whilelo p0.b, x8, x9
; CHECK-NEXT: whilelo p1.b, xzr, x9
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
; CHECK-NEXT: st1b { z1.b }, p1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 23 x i8>, ptr %a
store <vscale x 23 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv24i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv24i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ld1b { z1.h }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: st1b { z0.h }, p0, [x1, #2, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 24 x i8>, ptr %a
store <vscale x 24 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv25i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv25i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w10, #25 // =0x19
; CHECK-NEXT: lsr x9, x8, #4
; CHECK-NEXT: mul x9, x9, x10
; CHECK-NEXT: whilelo p0.b, x8, x9
; CHECK-NEXT: whilelo p1.b, xzr, x9
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
; CHECK-NEXT: st1b { z1.b }, p1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 25 x i8>, ptr %a
store <vscale x 25 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv26i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv26i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cnth x8, all, mul #3
; CHECK-NEXT: ldr z2, [x0]
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, x8]
; CHECK-NEXT: ld1b { z1.h }, p1/z, [x0, #2, mul vl]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
; CHECK-NEXT: uunpkhi z1.h, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: st1b { z1.d }, p0, [x1, x8]
; CHECK-NEXT: st1b { z0.h }, p1, [x1, #2, mul vl]
; CHECK-NEXT: str z2, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 26 x i8>, ptr %a
store <vscale x 26 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv27i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv27i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w10, #27 // =0x1b
; CHECK-NEXT: lsr x9, x8, #4
; CHECK-NEXT: mul x9, x9, x10
; CHECK-NEXT: whilelo p0.b, x8, x9
; CHECK-NEXT: whilelo p1.b, xzr, x9
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
; CHECK-NEXT: st1b { z1.b }, p1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 27 x i8>, ptr %a
store <vscale x 27 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv28i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv28i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr z2, [x0]
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, #6, mul vl]
; CHECK-NEXT: ld1b { z1.h }, p1/z, [x0, #2, mul vl]
; CHECK-NEXT: str z2, [x1]
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z1.b, z0.b
; CHECK-NEXT: uunpkhi z1.h, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: st1b { z0.h }, p1, [x1, #2, mul vl]
; CHECK-NEXT: st1b { z1.s }, p0, [x1, #6, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 28 x i8>, ptr %a
store <vscale x 28 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv29i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv29i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w10, #29 // =0x1d
; CHECK-NEXT: lsr x9, x8, #4
; CHECK-NEXT: mul x9, x9, x10
; CHECK-NEXT: whilelo p0.b, x8, x9
; CHECK-NEXT: whilelo p1.b, xzr, x9
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
; CHECK-NEXT: st1b { z1.b }, p1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 29 x i8>, ptr %a
store <vscale x 29 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv30i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv30i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cntw x8, all, mul #7
; CHECK-NEXT: ldr z3, [x0]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, x8]
; CHECK-NEXT: ptrue p2.h
; CHECK-NEXT: ld1b { z1.s }, p1/z, [x0, #6, mul vl]
; CHECK-NEXT: ld1b { z2.h }, p2/z, [x0, #2, mul vl]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z2.b, z0.b
; CHECK-NEXT: uunpkhi z1.h, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpkhi z2.s, z1.h
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpklo z2.d, z2.s
; CHECK-NEXT: st1b { z2.d }, p0, [x1, x8]
; CHECK-NEXT: st1b { z0.h }, p2, [x1, #2, mul vl]
; CHECK-NEXT: st1b { z1.s }, p1, [x1, #6, mul vl]
; CHECK-NEXT: str z3, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 30 x i8>, ptr %a
store <vscale x 30 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv31i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv31i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w10, #31 // =0x1f
; CHECK-NEXT: lsr x9, x8, #4
; CHECK-NEXT: mul x9, x9, x10
; CHECK-NEXT: whilelo p0.b, x8, x9
; CHECK-NEXT: whilelo p1.b, xzr, x9
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0]
; CHECK-NEXT: st1b { z0.b }, p0, [x1, #1, mul vl]
; CHECK-NEXT: st1b { z1.b }, p1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 31 x i8>, ptr %a
store <vscale x 31 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv32i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
; CHECK-NEXT: ldr z1, [x0]
; CHECK-NEXT: str z0, [x1, #1, mul vl]
; CHECK-NEXT: str z1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 32 x i8>, ptr %a
store <vscale x 32 x i8> %c, ptr %b
ret void
}
define void @sve_load_store_nxv1i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 1 x i16>, ptr %a
store <vscale x 1 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv2i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 2 x i16>, ptr %a
store <vscale x 2 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv3i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv3i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 3 x i16>, ptr %a
store <vscale x 3 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv4i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 4 x i16>, ptr %a
store <vscale x 4 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv5i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv5i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 5 x i16>, ptr %a
store <vscale x 5 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv6i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv6i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: uunpkhi z1.s, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: st1h { z0.s }, p1, [x1]
; CHECK-NEXT: st1h { z1.d }, p0, [x1, #2, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 6 x i16>, ptr %a
store <vscale x 6 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv7i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv7i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 7 x i16>, ptr %a
store <vscale x 7 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv8i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 8 x i16>, ptr %a
store <vscale x 8 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv9i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv9i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 9 x i16>, ptr %a
store <vscale x 9 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv10i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv10i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, #4, mul vl]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h
; CHECK-NEXT: uunpkhi z1.s, z1.h
; CHECK-NEXT: uunpkhi z1.d, z1.s
; CHECK-NEXT: uzp1 z1.s, z0.s, z1.s
; CHECK-NEXT: uzp1 z1.h, z0.h, z1.h
; CHECK-NEXT: uunpkhi z1.s, z1.h
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: uzp1 z1.s, z1.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1h { z0.d }, p0, [x1, #4, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 10 x i16>, ptr %a
store <vscale x 10 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv11i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv11i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 11 x i16>, ptr %a
store <vscale x 11 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv12i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv12i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: st1h { z0.s }, p0, [x1, #2, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 12 x i16>, ptr %a
store <vscale x 12 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv13i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv13i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 13 x i16>, ptr %a
store <vscale x 13 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv14i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv14i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr z2, [x0]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #6, mul vl]
; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0, #2, mul vl]
; CHECK-NEXT: str z2, [x1]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: uunpkhi z1.s, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: st1h { z0.s }, p1, [x1, #2, mul vl]
; CHECK-NEXT: st1h { z1.d }, p0, [x1, #6, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 14 x i16>, ptr %a
store <vscale x 14 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv15i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv15i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 15 x i16>, ptr %a
store <vscale x 15 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv16i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
; CHECK-NEXT: ldr z1, [x0]
; CHECK-NEXT: str z0, [x1, #1, mul vl]
; CHECK-NEXT: str z1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 16 x i16>, ptr %a
store <vscale x 16 x i16> %c, ptr %b
ret void
}
define void @sve_load_store_nxv1i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv1i32:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 1 x i32>, ptr %a
store <vscale x 1 x i32> %c, ptr %b
ret void
}
define void @sve_load_store_nxv2i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 2 x i32>, ptr %a
store <vscale x 2 x i32> %c, ptr %b
ret void
}
define void @sve_load_store_nxv3i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv3i32:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 3 x i32>, ptr %a
store <vscale x 3 x i32> %c, ptr %b
ret void
}
define void @sve_load_store_nxv4i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 4 x i32>, ptr %a
store <vscale x 4 x i32> %c, ptr %b
ret void
}
define void @sve_load_store_nxv5i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv5i32:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1w { z1.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 5 x i32>, ptr %a
store <vscale x 5 x i32> %c, ptr %b
ret void
}
define void @sve_load_store_nxv6i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv6i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1w { z0.d }, p0, [x1, #2, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 6 x i32>, ptr %a
store <vscale x 6 x i32> %c, ptr %b
ret void
}
define void @sve_load_store_nxv7i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv7i32:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1w { z1.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 7 x i32>, ptr %a
store <vscale x 7 x i32> %c, ptr %b
ret void
}
define void @sve_load_store_nxv8i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
; CHECK-NEXT: ldr z1, [x0]
; CHECK-NEXT: str z0, [x1, #1, mul vl]
; CHECK-NEXT: str z1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 8 x i32>, ptr %a
store <vscale x 8 x i32> %c, ptr %b
ret void
}
define void @sve_load_store_nxv1i64(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 1 x i64>, ptr %a
store <vscale x 1 x i64> %c, ptr %b
ret void
}
define void @sve_load_store_nxv2i64(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 2 x i64>, ptr %a
store <vscale x 2 x i64> %c, ptr %b
ret void
}
define void @sve_load_store_nxv3i64(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv3i64:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 3 x i64>, ptr %a
store <vscale x 3 x i64> %c, ptr %b
ret void
}
define void @sve_load_store_nxv4i64(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
; CHECK-NEXT: ldr z1, [x0]
; CHECK-NEXT: str z0, [x1, #1, mul vl]
; CHECK-NEXT: str z1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 4 x i64>, ptr %a
store <vscale x 4 x i64> %c, ptr %b
ret void
}
define void @sve_load_store_nxv1f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 1 x half>, ptr %a
store <vscale x 1 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv2f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 2 x half>, ptr %a
store <vscale x 2 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv3f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv3f16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 3 x half>, ptr %a
store <vscale x 3 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv4f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 4 x half>, ptr %a
store <vscale x 4 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv5f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv5f16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 5 x half>, ptr %a
store <vscale x 5 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv6f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv6f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1h { z1.s }, p1, [x1]
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1h { z0.d }, p0, [x1, #2, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 6 x half>, ptr %a
store <vscale x 6 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv7f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv7f16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 7 x half>, ptr %a
store <vscale x 7 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv8f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 8 x half>, ptr %a
store <vscale x 8 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv9f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv9f16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 9 x half>, ptr %a
store <vscale x 9 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv10f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv10f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, #4, mul vl]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: st1h { z1.d }, p0, [x1, #4, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 10 x half>, ptr %a
store <vscale x 10 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv11f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv11f16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 11 x half>, ptr %a
store <vscale x 11 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv12f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv12f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: st1h { z1.s }, p0, [x1, #2, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 12 x half>, ptr %a
store <vscale x 12 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv13f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv13f16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 13 x half>, ptr %a
store <vscale x 13 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv14f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv14f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr z2, [x0]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #6, mul vl]
; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0, #2, mul vl]
; CHECK-NEXT: str z2, [x1]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1h { z1.s }, p1, [x1, #2, mul vl]
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1h { z0.d }, p0, [x1, #6, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 14 x half>, ptr %a
store <vscale x 14 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv15f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv15f16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 15 x half>, ptr %a
store <vscale x 15 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv16f16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
; CHECK-NEXT: ldr z1, [x0]
; CHECK-NEXT: str z0, [x1, #1, mul vl]
; CHECK-NEXT: str z1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 16 x half>, ptr %a
store <vscale x 16 x half> %c, ptr %b
ret void
}
define void @sve_load_store_nxv1f32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv1f32:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 1 x float>, ptr %a
store <vscale x 1 x float> %c, ptr %b
ret void
}
define void @sve_load_store_nxv2f32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 2 x float>, ptr %a
store <vscale x 2 x float> %c, ptr %b
ret void
}
define void @sve_load_store_nxv3f32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv3f32:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 3 x float>, ptr %a
store <vscale x 3 x float> %c, ptr %b
ret void
}
define void @sve_load_store_nxv4f32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 4 x float>, ptr %a
store <vscale x 4 x float> %c, ptr %b
ret void
}
define void @sve_load_store_nxv5f32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv5f32:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1w { z1.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 5 x float>, ptr %a
store <vscale x 5 x float> %c, ptr %b
ret void
}
define void @sve_load_store_nxv6f32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv6f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: st1w { z1.d }, p0, [x1, #2, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 6 x float>, ptr %a
store <vscale x 6 x float> %c, ptr %b
ret void
}
define void @sve_load_store_nxv7f32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv7f32:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1w { z1.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 7 x float>, ptr %a
store <vscale x 7 x float> %c, ptr %b
ret void
}
define void @sve_load_store_nxv8f32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
; CHECK-NEXT: ldr z1, [x0]
; CHECK-NEXT: str z0, [x1, #1, mul vl]
; CHECK-NEXT: str z1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 8 x float>, ptr %a
store <vscale x 8 x float> %c, ptr %b
ret void
}
define void @sve_load_store_nxv1f64(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 1 x double>, ptr %a
store <vscale x 1 x double> %c, ptr %b
ret void
}
define void @sve_load_store_nxv2f64(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 2 x double>, ptr %a
store <vscale x 2 x double> %c, ptr %b
ret void
}
define void @sve_load_store_nxv3f64(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv3f64:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 3 x double>, ptr %a
store <vscale x 3 x double> %c, ptr %b
ret void
}
define void @sve_load_store_nxv4f64(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
; CHECK-NEXT: ldr z1, [x0]
; CHECK-NEXT: str z0, [x1, #1, mul vl]
; CHECK-NEXT: str z1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 4 x double>, ptr %a
store <vscale x 4 x double> %c, ptr %b
ret void
}
define void @sve_load_store_nxv1bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv1bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 1 x bfloat>, ptr %a
store <vscale x 1 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv2bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 2 x bfloat>, ptr %a
store <vscale x 2 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv3bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv3bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 3 x bfloat>, ptr %a
store <vscale x 3 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv4bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 4 x bfloat>, ptr %a
store <vscale x 4 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv5bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv5bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 5 x bfloat>, ptr %a
store <vscale x 5 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv6bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv6bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1h { z1.s }, p1, [x1]
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1h { z0.d }, p0, [x1, #2, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 6 x bfloat>, ptr %a
store <vscale x 6 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv7bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv7bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 7 x bfloat>, ptr %a
store <vscale x 7 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv8bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 8 x bfloat>, ptr %a
store <vscale x 8 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv9bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv9bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 9 x bfloat>, ptr %a
store <vscale x 9 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv10bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv10bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, #4, mul vl]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: st1h { z1.d }, p0, [x1, #4, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 10 x bfloat>, ptr %a
store <vscale x 10 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv11bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv11bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 11 x bfloat>, ptr %a
store <vscale x 11 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv12bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv12bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: str z0, [x1]
; CHECK-NEXT: st1h { z1.s }, p0, [x1, #2, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 12 x bfloat>, ptr %a
store <vscale x 12 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv13bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv13bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 13 x bfloat>, ptr %a
store <vscale x 13 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv14bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv14bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr z2, [x0]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #6, mul vl]
; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0, #2, mul vl]
; CHECK-NEXT: str z2, [x1]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: st1h { z1.s }, p1, [x1, #2, mul vl]
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1h { z0.d }, p0, [x1, #6, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 14 x bfloat>, ptr %a
store <vscale x 14 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv15bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv15bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [x1, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 15 x bfloat>, ptr %a
store <vscale x 15 x bfloat> %c, ptr %b
ret void
}
define void @sve_load_store_nxv16bf16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_load_store_nxv16bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
; CHECK-NEXT: ldr z1, [x0]
; CHECK-NEXT: str z0, [x1, #1, mul vl]
; CHECK-NEXT: str z1, [x1]
; CHECK-NEXT: ret
%c = load <vscale x 16 x bfloat>, ptr %a
store <vscale x 16 x bfloat> %c, ptr %b
ret void
}
define <vscale x 1 x i16> @sve_sextload_nxv1i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv1i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 1 x i8>, ptr %a
%c.sext = sext <vscale x 1 x i8> %c to <vscale x 1 x i16>
ret <vscale x 1 x i16> %c.sext
}
define <vscale x 2 x i16> @sve_sextload_nxv2i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 2 x i8>, ptr %a
%c.sext = sext <vscale x 2 x i8> %c to <vscale x 2 x i16>
ret <vscale x 2 x i16> %c.sext
}
define <vscale x 3 x i16> @sve_sextload_nxv3i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv3i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 3 x i8>, ptr %a
%c.sext = sext <vscale x 3 x i8> %c to <vscale x 3 x i16>
ret <vscale x 3 x i16> %c.sext
}
define <vscale x 4 x i16> @sve_sextload_nxv4i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 4 x i8>, ptr %a
%c.sext = sext <vscale x 4 x i8> %c to <vscale x 4 x i16>
ret <vscale x 4 x i16> %c.sext
}
define <vscale x 5 x i16> @sve_sextload_nxv5i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv5i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 5 x i8>, ptr %a
%c.sext = sext <vscale x 5 x i8> %c to <vscale x 5 x i16>
ret <vscale x 5 x i16> %c.sext
}
define <vscale x 6 x i16> @sve_sextload_nxv6i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv6i8:
; CHECK: // %bb.0:
; CHECK-NEXT: cntd x8, all, mul #3
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 6 x i8>, ptr %a
%c.sext = sext <vscale x 6 x i8> %c to <vscale x 6 x i16>
ret <vscale x 6 x i16> %c.sext
}
define <vscale x 7 x i16> @sve_sextload_nxv7i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv7i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 7 x i8>, ptr %a
%c.sext = sext <vscale x 7 x i8> %c to <vscale x 7 x i16>
ret <vscale x 7 x i16> %c.sext
}
define <vscale x 8 x i16> @sve_sextload_nxv8i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 8 x i8>, ptr %a
%c.sext = sext <vscale x 8 x i8> %c to <vscale x 8 x i16>
ret <vscale x 8 x i16> %c.sext
}
define <vscale x 9 x i16> @sve_sextload_nxv9i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv9i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 9 x i8>, ptr %a
%c.sext = sext <vscale x 9 x i8> %c to <vscale x 9 x i16>
ret <vscale x 9 x i16> %c.sext
}
define <vscale x 10 x i16> @sve_sextload_nxv10i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv10i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: cntd x8, all, mul #5
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: str z1, [sp]
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1h { z0.d }, p0, [sp, #4, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 10 x i8>, ptr %a
%c.sext = sext <vscale x 10 x i8> %c to <vscale x 10 x i16>
ret <vscale x 10 x i16> %c.sext
}
define <vscale x 11 x i16> @sve_sextload_nxv11i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv11i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 11 x i8>, ptr %a
%c.sext = sext <vscale x 11 x i8> %c to <vscale x 11 x i16>
ret <vscale x 11 x i16> %c.sext
}
define <vscale x 12 x i16> @sve_sextload_nxv12i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv12i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: cntw x8, all, mul #3
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: str z1, [sp]
; CHECK-NEXT: st1h { z0.s }, p1, [sp, #2, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 12 x i8>, ptr %a
%c.sext = sext <vscale x 12 x i8> %c to <vscale x 12 x i16>
ret <vscale x 12 x i16> %c.sext
}
define <vscale x 13 x i16> @sve_sextload_nxv13i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv13i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 13 x i8>, ptr %a
%c.sext = sext <vscale x 13 x i8> %c to <vscale x 13 x i16>
ret <vscale x 13 x i16> %c.sext
}
define <vscale x 14 x i16> @sve_sextload_nxv14i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv14i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: cntd x8, all, mul #7
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1sb { z2.h }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpkhi z1.s, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: str z2, [sp]
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: st1h { z0.s }, p1, [sp, #2, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: st1h { z1.d }, p0, [sp, #6, mul vl]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 14 x i8>, ptr %a
%c.sext = sext <vscale x 14 x i8> %c to <vscale x 14 x i16>
ret <vscale x 14 x i16> %c.sext
}
define <vscale x 15 x i16> @sve_sextload_nxv15i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv15i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 15 x i8>, ptr %a
%c.sext = sext <vscale x 15 x i8> %c to <vscale x 15 x i16>
ret <vscale x 15 x i16> %c.sext
}
define <vscale x 16 x i16> @sve_sextload_nxv16i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 16 x i8>, ptr %a
%c.sext = sext <vscale x 16 x i8> %c to <vscale x 16 x i16>
ret <vscale x 16 x i16> %c.sext
}
define <vscale x 1 x i32> @sve_sextload_nxv1i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 1 x i16>, ptr %a
%c.sext = sext <vscale x 1 x i16> %c to <vscale x 1 x i32>
ret <vscale x 1 x i32> %c.sext
}
define <vscale x 2 x i32> @sve_sextload_nxv2i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 2 x i16>, ptr %a
%c.sext = sext <vscale x 2 x i16> %c to <vscale x 2 x i32>
ret <vscale x 2 x i32> %c.sext
}
define <vscale x 3 x i32> @sve_sextload_nxv3i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv3i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 3 x i16>, ptr %a
%c.sext = sext <vscale x 3 x i16> %c to <vscale x 3 x i32>
ret <vscale x 3 x i32> %c.sext
}
define <vscale x 4 x i32> @sve_sextload_nxv4i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 4 x i16>, ptr %a
%c.sext = sext <vscale x 4 x i16> %c to <vscale x 4 x i32>
ret <vscale x 4 x i32> %c.sext
}
define <vscale x 5 x i32> @sve_sextload_nxv5i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv5i16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1w { z1.s }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 5 x i16>, ptr %a
%c.sext = sext <vscale x 5 x i16> %c to <vscale x 5 x i32>
ret <vscale x 5 x i32> %c.sext
}
define <vscale x 6 x i32> @sve_sextload_nxv6i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv6i16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: cntd x8, all, mul #3
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: str z1, [sp]
; CHECK-NEXT: st1w { z0.d }, p1, [sp, #2, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 6 x i16>, ptr %a
%c.sext = sext <vscale x 6 x i16> %c to <vscale x 6 x i32>
ret <vscale x 6 x i32> %c.sext
}
define <vscale x 7 x i32> @sve_sextload_nxv7i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv7i16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1w { z1.s }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 7 x i16>, ptr %a
%c.sext = sext <vscale x 7 x i16> %c to <vscale x 7 x i32>
ret <vscale x 7 x i32> %c.sext
}
define <vscale x 8 x i32> @sve_sextload_nxv8i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 8 x i16>, ptr %a
%c.sext = sext <vscale x 8 x i16> %c to <vscale x 8 x i32>
ret <vscale x 8 x i32> %c.sext
}
define <vscale x 1 x i64> @sve_sextload_nxv1i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv1i32:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 1 x i32>, ptr %a
%c.sext = sext <vscale x 1 x i32> %c to <vscale x 1 x i64>
ret <vscale x 1 x i64> %c.sext
}
define <vscale x 2 x i64> @sve_sextload_nxv2i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 2 x i32>, ptr %a
%c.sext = sext <vscale x 2 x i32> %c to <vscale x 2 x i64>
ret <vscale x 2 x i64> %c.sext
}
define <vscale x 3 x i64> @sve_sextload_nxv3i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv3i32:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sw { z0.d }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 3 x i32>, ptr %a
%c.sext = sext <vscale x 3 x i32> %c to <vscale x 3 x i64>
ret <vscale x 3 x i64> %c.sext
}
define <vscale x 4 x i64> @sve_sextload_nxv4i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_sextload_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 4 x i32>, ptr %a
%c.sext = sext <vscale x 4 x i32> %c to <vscale x 4 x i64>
ret <vscale x 4 x i64> %c.sext
}
define <vscale x 1 x i16> @sve_zextload_nxv1i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv1i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 1 x i8>, ptr %a
%c.zext = sext <vscale x 1 x i8> %c to <vscale x 1 x i16>
ret <vscale x 1 x i16> %c.zext
}
define <vscale x 2 x i16> @sve_zextload_nxv2i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 2 x i8>, ptr %a
%c.zext = sext <vscale x 2 x i8> %c to <vscale x 2 x i16>
ret <vscale x 2 x i16> %c.zext
}
define <vscale x 3 x i16> @sve_zextload_nxv3i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv3i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 3 x i8>, ptr %a
%c.zext = sext <vscale x 3 x i8> %c to <vscale x 3 x i16>
ret <vscale x 3 x i16> %c.zext
}
define <vscale x 4 x i16> @sve_zextload_nxv4i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 4 x i8>, ptr %a
%c.zext = sext <vscale x 4 x i8> %c to <vscale x 4 x i16>
ret <vscale x 4 x i16> %c.zext
}
define <vscale x 5 x i16> @sve_zextload_nxv5i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv5i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 5 x i8>, ptr %a
%c.zext = sext <vscale x 5 x i8> %c to <vscale x 5 x i16>
ret <vscale x 5 x i16> %c.zext
}
define <vscale x 6 x i16> @sve_zextload_nxv6i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv6i8:
; CHECK: // %bb.0:
; CHECK-NEXT: cntd x8, all, mul #3
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 6 x i8>, ptr %a
%c.zext = sext <vscale x 6 x i8> %c to <vscale x 6 x i16>
ret <vscale x 6 x i16> %c.zext
}
define <vscale x 7 x i16> @sve_zextload_nxv7i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv7i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 7 x i8>, ptr %a
%c.zext = sext <vscale x 7 x i8> %c to <vscale x 7 x i16>
ret <vscale x 7 x i16> %c.zext
}
define <vscale x 8 x i16> @sve_zextload_nxv8i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 8 x i8>, ptr %a
%c.zext = sext <vscale x 8 x i8> %c to <vscale x 8 x i16>
ret <vscale x 8 x i16> %c.zext
}
define <vscale x 9 x i16> @sve_zextload_nxv9i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv9i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 9 x i8>, ptr %a
%c.zext = sext <vscale x 9 x i8> %c to <vscale x 9 x i16>
ret <vscale x 9 x i16> %c.zext
}
define <vscale x 10 x i16> @sve_zextload_nxv10i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv10i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: cntd x8, all, mul #5
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: str z1, [sp]
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1h { z0.d }, p0, [sp, #4, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 10 x i8>, ptr %a
%c.zext = sext <vscale x 10 x i8> %c to <vscale x 10 x i16>
ret <vscale x 10 x i16> %c.zext
}
define <vscale x 11 x i16> @sve_zextload_nxv11i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv11i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #11 // =0xb
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 11 x i8>, ptr %a
%c.zext = sext <vscale x 11 x i8> %c to <vscale x 11 x i16>
ret <vscale x 11 x i16> %c.zext
}
define <vscale x 12 x i16> @sve_zextload_nxv12i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv12i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: cntw x8, all, mul #3
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: str z1, [sp]
; CHECK-NEXT: st1h { z0.s }, p1, [sp, #2, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 12 x i8>, ptr %a
%c.zext = sext <vscale x 12 x i8> %c to <vscale x 12 x i16>
ret <vscale x 12 x i16> %c.zext
}
define <vscale x 13 x i16> @sve_zextload_nxv13i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv13i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #13 // =0xd
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 13 x i8>, ptr %a
%c.zext = sext <vscale x 13 x i8> %c to <vscale x 13 x i16>
ret <vscale x 13 x i16> %c.zext
}
define <vscale x 14 x i16> @sve_zextload_nxv14i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv14i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: cntd x8, all, mul #7
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1sb { z2.h }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpkhi z1.s, z0.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: str z2, [sp]
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: st1h { z0.s }, p1, [sp, #2, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: st1h { z1.d }, p0, [sp, #6, mul vl]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 14 x i8>, ptr %a
%c.zext = sext <vscale x 14 x i8> %c to <vscale x 14 x i16>
ret <vscale x 14 x i16> %c.zext
}
define <vscale x 15 x i16> @sve_zextload_nxv15i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv15i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #15 // =0xf
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sb { z0.h }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.h }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 15 x i8>, ptr %a
%c.zext = sext <vscale x 15 x i8> %c to <vscale x 15 x i16>
ret <vscale x 15 x i16> %c.zext
}
define <vscale x 16 x i16> @sve_zextload_nxv16i8(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 16 x i8>, ptr %a
%c.zext = sext <vscale x 16 x i8> %c to <vscale x 16 x i16>
ret <vscale x 16 x i16> %c.zext
}
define <vscale x 1 x i32> @sve_zextload_nxv1i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 1 x i16>, ptr %a
%c.zext = sext <vscale x 1 x i16> %c to <vscale x 1 x i32>
ret <vscale x 1 x i32> %c.zext
}
define <vscale x 2 x i32> @sve_zextload_nxv2i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 2 x i16>, ptr %a
%c.zext = sext <vscale x 2 x i16> %c to <vscale x 2 x i32>
ret <vscale x 2 x i32> %c.zext
}
define <vscale x 3 x i32> @sve_zextload_nxv3i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv3i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 3 x i16>, ptr %a
%c.zext = sext <vscale x 3 x i16> %c to <vscale x 3 x i32>
ret <vscale x 3 x i32> %c.zext
}
define <vscale x 4 x i32> @sve_zextload_nxv4i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 4 x i16>, ptr %a
%c.zext = sext <vscale x 4 x i16> %c to <vscale x 4 x i32>
ret <vscale x 4 x i32> %c.zext
}
define <vscale x 5 x i32> @sve_zextload_nxv5i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv5i16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #5 // =0x5
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1w { z1.s }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 5 x i16>, ptr %a
%c.zext = sext <vscale x 5 x i16> %c to <vscale x 5 x i32>
ret <vscale x 5 x i32> %c.zext
}
define <vscale x 6 x i32> @sve_zextload_nxv6i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv6i16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: cntd x8, all, mul #3
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: str z1, [sp]
; CHECK-NEXT: st1w { z0.d }, p1, [sp, #2, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 6 x i16>, ptr %a
%c.zext = sext <vscale x 6 x i16> %c to <vscale x 6 x i32>
ret <vscale x 6 x i32> %c.zext
}
define <vscale x 7 x i32> @sve_zextload_nxv7i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv7i16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #7 // =0x7
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.h, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sh { z0.s }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1w { z1.s }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 7 x i16>, ptr %a
%c.zext = sext <vscale x 7 x i16> %c to <vscale x 7 x i32>
ret <vscale x 7 x i32> %c.zext
}
define <vscale x 8 x i32> @sve_zextload_nxv8i16(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 8 x i16>, ptr %a
%c.zext = sext <vscale x 8 x i16> %c to <vscale x 8 x i32>
ret <vscale x 8 x i32> %c.zext
}
define <vscale x 1 x i64> @sve_zextload_nxv1i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv1i32:
; CHECK: // %bb.0:
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: whilelo p0.d, xzr, x8
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 1 x i32>, ptr %a
%c.zext = sext <vscale x 1 x i32> %c to <vscale x 1 x i64>
ret <vscale x 1 x i64> %c.zext
}
define <vscale x 2 x i64> @sve_zextload_nxv2i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
; CHECK-NEXT: ret
%c = load <vscale x 2 x i32>, ptr %a
%c.zext = sext <vscale x 2 x i32> %c to <vscale x 2 x i64>
ret <vscale x 2 x i64> %c.zext
}
define <vscale x 3 x i64> @sve_zextload_nxv3i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv3i32:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: whilelo p0.s, xzr, x8
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: ld1sw { z0.d }, p1/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z0.d }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%c = load <vscale x 3 x i32>, ptr %a
%c.zext = sext <vscale x 3 x i32> %c to <vscale x 3 x i64>
ret <vscale x 3 x i64> %c.zext
}
define <vscale x 4 x i64> @sve_zextload_nxv4i32(ptr %a, ptr %b) {
; CHECK-LABEL: sve_zextload_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ret
%c = load <vscale x 4 x i32>, ptr %a
%c.zext = sext <vscale x 4 x i32> %c to <vscale x 4 x i64>
ret <vscale x 4 x i64> %c.zext
}