blob: bb7ffb47d8dfef452702cd13bfda06642ac7926a [file] [edit]
# RUN: llc -mattr=+sve -aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024 -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
# RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve -aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024 -start-before=prologepilog %s -o - | FileCheck %s --check-prefix=ASM
# RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve -aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024 -start-before=prologepilog %s -filetype=obj -o %t
# RUN: llvm-objdump --dwarf=frames %t | FileCheck %s --check-prefix=UNWINDINFO
# RUN: rm -rf %t
#
# Test allocation and deallocation of SVE objects on the stack with
# split-sve-objects (and hazard padding) enabled. This also tests using a
# combination of scalable and non-scalable offsets to access the SVE on the
# stack.
#
# With split-sve-objects (which implies hazard padding) the SVE area is split
# into PPR and ZPR areas with (fixed-size) hazard padding between them. The PPR
# area holds all scalable predicate callee saves and locals, and the ZPR area
# holds all scalable vector callee saves and locals. Additionally, any FPR
# callee save is promoted to a ZPR callee save (to avoid needing additional
# hazard padding in the callee save area).
#
# +-------------+
# | stack arg |
# +-------------+ <- SP before call
# | Callee Saves|
# | Frame record| (if available)
# |-------------| <- FP (if available)
# | PPR area |
# |-------------|
# |/////////////| hazard padding
# |-------------|
# | ZPR area |
# +-------------+
# | : |
# | Stack objs |
# | : |
# +-------------+ <- SP after call and frame-setup
#
--- |
define void @test_allocate_split_sve() uwtable { entry: unreachable }
define void @test_allocate_split_sve_realigned() uwtable { entry: unreachable }
define void @test_address_split_sve() uwtable { entry: unreachable }
define void @test_address_split_sve_fp() uwtable { entry: unreachable }
define aarch64_sve_vector_pcs void @save_restore_ppr_zpr() uwtable { entry: unreachable }
...
---
# +----------+
# |scratchreg| // x29 is used as scratch reg.
# |----------|
# | %stack.0 | // scalable predicate of n * 12 bytes, aligned to 16 bytes
# | | // to be materialized with 1*ADDVL (<=> n * 16 bytes)
# |----------|
# |//////////| // hazard padding (1024 bytes) -- part of PPR locals area
# |//////////| // Note: This is currently not included in the "stackSize"
# +----------+
# | %stack.0 | // scalable SVE object of n * 18 bytes, aligned to 16 bytes,
# | | // to be materialized with 2*ADDVL (<=> 2 * n * 16 bytes)
# +----------+
# |//////////| // hazard padding (1024 bytes)
# |----------|
# | %stack.1 | // not scalable
# +----------+ <- SP
# CHECK-LABEL: name: test_allocate_split_sve
# CHECK: stackSize: 1056
# CHECK: bb.0.entry:
# CHECK: liveins: $z0, $p0, $fp
# CHECK: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.4)
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2064, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2080
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3, implicit $vg
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22
#
# CHECK-NEXT: $x8 = ADDXri $sp, 1040, 0
# CHECK-NEXT: $x8 = ADDPL_XXI $x8, 7, implicit $vg
# CHECK-NEXT: STR_ZXI $z0, killed $x8, 0 :: (store (<vscale x 1 x s128>) into %stack.0)
# CHECK-NEXT: $x8 = ADDXri $sp, 2064, 0
# CHECK-NEXT: STR_PXI $p0, killed $x8, 18 :: (store (<vscale x 1 x s16>) into %stack.1)
#
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2064, 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3, implicit $vg
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
# CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.4)
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
# CHECK-NEXT: RET_ReallyLR
# ASM-LABEL: test_allocate_split_sve:
# ASM: str x29, [sp, #-16]!
# ASM-NEXT: .cfi_def_cfa_offset 16
# ASM-NEXT: .cfi_offset w29, -16
# ASM-NEXT: sub sp, sp, #2064
# ASM-NEXT: .cfi_def_cfa_offset 2080
# ASM-NEXT: addvl sp, sp, #-3
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 2080 + 24 * VG
#
# ASM: add sp, sp, #2064
# ASM-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
# ASM-NEXT: addvl sp, sp, #3
# ASM-NEXT: .cfi_def_cfa wsp, 16
# ASM-NEXT: ldr x29, [sp], #16
# ASM-NEXT: .cfi_def_cfa_offset 0
# ASM-NEXT: .cfi_restore w29
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
# UNWINDINFO: DW_CFA_def_cfa_offset: +2080
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus
#
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa: reg31 +16
# UNWINDINFO: DW_CFA_def_cfa_offset: +0
# UNWINDINFO-NEXT: DW_CFA_restore: reg29
name: test_allocate_split_sve
stack:
- { id: 0, stack-id: scalable-vector, size: 18, alignment: 2 }
- { id: 1, stack-id: scalable-vector, size: 12, alignment: 2 }
- { id: 2, stack-id: default, size: 16, alignment: 8 }
body: |
bb.0.entry:
liveins: $z0, $p0
STR_ZXI $z0, %stack.0, 0 :: (store (<vscale x 1 x s128>) into %stack.0)
STR_PXI $p0, %stack.1, 0 :: (store (<vscale x 1 x s16>) into %stack.1)
RET_ReallyLR
...
---
# Stack realignment is not supported with split-sve-objects, so we fallback to
# the default hazard padding implementation. This does not prevent hazards
# between ZPRs and PPRs (TODO: support this case).
#
# +----------+
# | lr, fp | // frame record
# |----------|
# |//////////| // hazard padding (1024 bytes)
# |----------|
# | %stack.0 | // scalable predicate of n * 12 bytes, aligned to 16 bytes
# | | // to be materialized with 1*ADDVL (<=> n * 16 bytes)
# +----------+
# | %stack.0 | // scalable SVE object of n * 18 bytes, aligned to 16 bytes,
# | | // to be materialized with 2*ADDVL (<=> 2 * n * 16 bytes)
# +----------+
# |//////////| // hazard padding (1024 bytes)
# |----------|
# | %stack.1 | // not scalable
# +----------+ <- SP
name: test_allocate_split_sve_realigned
stack:
- { id: 0, stack-id: scalable-vector, size: 18, alignment: 2 }
- { id: 1, stack-id: scalable-vector, size: 12, alignment: 2 }
- { id: 2, stack-id: default, size: 16, alignment: 32 }
body: |
bb.0.entry:
liveins: $z0, $p0
STR_ZXI $z0, %stack.0, 0 :: (store (<vscale x 1 x s128>) into %stack.0)
STR_PXI $p0, %stack.1, 0 :: (store (<vscale x 1 x s16>) into %stack.1)
RET_ReallyLR
# CHECK-LABEL: name: test_allocate_split_sve_realigned
# CHECK: stackSize: 1056
# CHECK: bb.0.entry:
# CHECK: liveins: $z0, $p0, $lr
# CHECK: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.5), (store (s64) into %stack.4)
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 2064, 0
# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $x9, -3, implicit $vg
# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]], 7930
#
# CHECK-NEXT: $x8 = SUBXri $fp, 1024, 0
# CHECK-NEXT: $x8 = ADDPL_XXI $x8, -1, implicit $vg
# CHECK-NEXT: STR_ZXI $z0, killed $x8, -2 :: (store (<vscale x 1 x s128>) into %stack.0)
# CHECK-NEXT: STR_PXI $p0, $fp, -6 :: (store (<vscale x 1 x s16>) into %stack.1)
#
# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
# CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.5), (load (s64) from %stack.4)
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
# CHECK-NEXT: RET_ReallyLR
# ASM-LABEL: test_allocate_split_sve_realigned
# ASM: stp x29, x30, [sp, #-16]!
# ASM-NEXT: .cfi_def_cfa_offset 16
# ASM-NEXT: mov x29, sp
# ASM-NEXT: .cfi_def_cfa w29, 16
# ASM-NEXT: .cfi_offset w30, -8
# ASM-NEXT: .cfi_offset w29, -16
#
# ASM: mov sp, x29
# ASM-NEXT: .cfi_def_cfa wsp, 16
# ASM-NEXT: ldp x29, x30, [sp], #16
# ASM-NEXT: .cfi_def_cfa_offset 0
# ASM-NEXT: .cfi_restore w30
# ASM-NEXT: .cfi_restore w29
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO: DW_CFA_def_cfa: reg29 +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
#
# UNWINDINFO: DW_CFA_def_cfa: reg31 +16
# UNWINDINFO: DW_CFA_def_cfa_offset: +0
# UNWINDINFO-NEXT: DW_CFA_restore: reg30
# UNWINDINFO-NEXT: DW_CFA_restore: reg29
...
---
# +----------+
# |scratchreg| // x29 is used as scratch reg.
# +----------+
# | %stack.2 | // scalable predicate @ SP + 2064b + 46 scalable bytes
# |----------|
# |//////////| // hazard padding (1024 bytes) -- part of PPR locals area
# |//////////| // Note: This is currently not included in the "stackSize"
# |----------|
# | %stack.0 | // scalable vector @ SP + 1040b + 16 scalable bytes
# | %stack.1 | // scalable vector @ SP + 1040b
# +----------+
# |//////////| // hazard padding (1024 bytes)
# |----------|
# | %stack.3 | // not scalable
# +----------+ <- SP
# CHECK-LABEL: name: test_address_split_sve
# CHECK: stackSize: 1056
# CHECK: bb.0.entry:
# CHECK-NEXT: liveins:
# CHECK-NEXT: {{ $}}
# CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.5)
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2064, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2080
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3, implicit $vg
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22
#
# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 1040, 0
# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], 1
# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 1040, 0
# CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], 0
# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 2064, 0
# CHECK-NEXT: STR_PXI $p0, killed $[[TMP]], 23
#
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2064, 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3, implicit $vg
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
# CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.5)
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
# CHECK-NEXT: RET_ReallyLR
# ASM-LABEL: test_address_split_sve
# ASM: str x29, [sp, #-16]!
# ASM-NEXT: .cfi_def_cfa_offset 16
# ASM-NEXT: .cfi_offset w29, -16
# ASM-NEXT: sub sp, sp, #2064
# ASM-NEXT: .cfi_def_cfa_offset 2080
# ASM-NEXT: addvl sp, sp, #-3
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 2080 + 24 * VG
#
# ASM: add sp, sp, #2064
# ASM-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
# ASM-NEXT: addvl sp, sp, #3
# ASM-NEXT: .cfi_def_cfa wsp, 16
# ASM-NEXT: ldr x29, [sp], #16
# ASM-NEXT: .cfi_def_cfa_offset 0
# ASM-NEXT: .cfi_restore w29
# ASM-NEXT: ret
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
# UNWINDINFO: DW_CFA_def_cfa_offset: +2080
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus
#
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa: reg31 +16
# UNWINDINFO: DW_CFA_def_cfa_offset: +0
# UNWINDINFO-NEXT: DW_CFA_restore: reg29
name: test_address_split_sve
frameInfo:
maxAlignment: 16
stack:
- { id: 0, stack-id: scalable-vector, size: 16, alignment: 8 }
- { id: 1, stack-id: scalable-vector, size: 16, alignment: 8 }
- { id: 2, stack-id: scalable-vector, size: 2, alignment: 2 }
- { id: 3, stack-id: default, size: 16, alignment: 8 }
body: |
bb.0.entry:
liveins: $z0, $z1, $p0
STR_ZXI $z0, %stack.0, 0 :: (store (<vscale x 1 x s128>) into %stack.0)
STR_ZXI $z1, %stack.1, 0 :: (store (<vscale x 1 x s128>) into %stack.1)
STR_PXI $p0, %stack.2, 0 :: (store (<vscale x 1 x s16>) into %stack.2)
RET_ReallyLR
...
---
# +----------+
# | lr, fp | // frame record
# +----------+ <- FP
# | %stack.2 | // scalable predicate @ FP - 2 scalable bytes
# |----------|
# |//////////| // hazard padding (1024 bytes) -- part of PPR locals area
# |//////////| // Note: This is currently not included in the "stackSize"
# |----------|
# | %stack.0 | // scalable vector @ FP - 1024b - 32 scalable bytes
# | %stack.1 | // scalable vector @ FP - 1024b - 48 scalable bytes
# +----------+
# |//////////| // hazard padding (1024 bytes)
# |----------|
# | %stack.3 | // not scalable
# +----------+ <- SP
# CHECK-LABEL: name: test_address_split_sve_fp
# CHECK: stackSize: 1056
#
# CHECK: bb.0.entry:
# CHECK-NEXT: liveins:
# CHECK-NEXT: {{ $}}
# CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.6), (store (s64) into %stack.5)
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2064, 0
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3, implicit $vg
#
# CHECK-NEXT: $[[TMP:x[0-9]+]] = SUBXri $fp, 1024, 0
# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], -2
# CHECK-NEXT: $[[TMP:x[0-9]+]] = SUBXri $fp, 1024, 0
# CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], -3
# CHECK-NEXT: STR_PXI $p0, $fp, -1
#
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2064, 0
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3, implicit $vg
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
# CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.6), (load (s64) from %stack.5)
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
# CHECK-NEXT: RET_ReallyLR
# ASM-LABEL: test_address_split_sve_fp
# ASM: stp x29, x30, [sp, #-16]!
# ASM-NEXT: .cfi_def_cfa_offset 16
# ASM-NEXT: mov x29, sp
# ASM-NEXT: .cfi_def_cfa w29, 16
# ASM-NEXT: .cfi_offset w30, -8
# ASM-NEXT: .cfi_offset w29, -16
# ASM-NEXT: sub sp, sp, #2064
# ASM-NEXT: addvl sp, sp, #-3
#
# ASM: add sp, sp, #2064
# ASM-NEXT: addvl sp, sp, #3
# ASM-NEXT: .cfi_def_cfa wsp, 16
# ASM-NEXT: ldp x29, x30, [sp], #16
# ASM-NEXT: .cfi_def_cfa_offset 0
# ASM-NEXT: .cfi_restore w30
# ASM-NEXT: .cfi_restore w29
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO: DW_CFA_def_cfa: reg29 +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
#
# UNWINDINFO: DW_CFA_def_cfa: reg31 +16
# UNWINDINFO: DW_CFA_def_cfa_offset: +0
# UNWINDINFO-NEXT: DW_CFA_restore: reg30
# UNWINDINFO-NEXT: DW_CFA_restore: reg29
name: test_address_split_sve_fp
frameInfo:
maxAlignment: 16
isFrameAddressTaken: true
stack:
- { id: 0, stack-id: scalable-vector, size: 16, alignment: 8 }
- { id: 1, stack-id: scalable-vector, size: 16, alignment: 8 }
- { id: 2, stack-id: scalable-vector, size: 2, alignment: 2 }
- { id: 3, stack-id: default, size: 16, alignment: 8 }
body: |
bb.0.entry:
liveins: $z0, $z1, $p0
STR_ZXI $z0, %stack.0, 0 :: (store (<vscale x 1 x s128>) into %stack.0)
STR_ZXI $z1, %stack.1, 0 :: (store (<vscale x 1 x s128>) into %stack.1)
STR_PXI $p0, %stack.2, 0 :: (store (<vscale x 1 x s16>) into %stack.2)
RET_ReallyLR
...
---
# CHECK-LABEL: name: save_restore_ppr_zpr
# CHECK: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.8)
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
# CHECK-NEXT: frame-setup STR_PXI killed $p6, $sp, 5 :: (store (s16) into %stack.7)
# CHECK-NEXT: frame-setup STR_PXI killed $p5, $sp, 6 :: (store (s16) into %stack.6)
# CHECK-NEXT: frame-setup STR_PXI killed $p4, $sp, 7 :: (store (s16) into %stack.5)
#
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
#
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3, implicit $vg
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0a, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22
# CHECK-NEXT: frame-setup STR_ZXI killed $z10, $sp, 0 :: (store (s128) into %stack.4)
# CHECK-NEXT: frame-setup STR_ZXI killed $z9, $sp, 1 :: (store (s128) into %stack.3)
# CHECK-NEXT: frame-setup STR_ZXI killed $z8, $sp, 2 :: (store (s128) into %stack.2)
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1056, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0a, 0x8f, 0xb0, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22
#
#
# CHECK: $sp = frame-destroy ADDXri $sp, 1056, 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x0a, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22
# CHECK-NEXT: $z10 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.4)
# CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.3)
# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.2)
#
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22
#
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3, implicit $vg
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z8
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z9
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $z10
# CHECK-NEXT: $p6 = frame-destroy LDR_PXI $sp, 5 :: (load (s16) from %stack.7)
# CHECK-NEXT: $p5 = frame-destroy LDR_PXI $sp, 6 :: (load (s16) from %stack.6)
# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 7 :: (load (s16) from %stack.5)
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
# CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.8)
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
# CHECK-NEXT: RET_ReallyLR
# ASM-LABEL: save_restore_ppr_zpr:
# ASM: str x29, [sp, #-16]!
# ASM-NEXT: .cfi_def_cfa_offset 16
# ASM-NEXT: .cfi_offset w29, -16
# ASM-NEXT: addvl sp, sp, #-1
# ASM-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
# ASM-NEXT: str p6, [sp, #5, mul vl]
# ASM-NEXT: str p5, [sp, #6, mul vl]
# ASM-NEXT: str p4, [sp, #7, mul vl]
# ASM-NEXT: sub sp, sp, #1024
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG
# ASM-NEXT: addvl sp, sp, #-3
# ASM-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 1040 + 32 * VG
# ASM-NEXT: str z10, [sp]
# ASM-NEXT: str z9, [sp, #1, mul vl]
# ASM-NEXT: str z8, [sp, #2, mul vl]
# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1040
# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 24 * VG - 1040
# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d10 @ cfa - 32 * VG - 1040
# ASM-NEXT: sub sp, sp, #1056
# ASM-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0xb0, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 2096 + 32 * VG
#
# ASM: add sp, sp, #1056
# ASM-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 1040 + 32 * VG
# ASM-NEXT: ldr z10, [sp]
# ASM-NEXT: ldr z9, [sp, #1, mul vl]
# ASM-NEXT: ldr z8, [sp, #2, mul vl]
# ASM-NEXT: add sp, sp, #1024
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 16 + 32 * VG
# ASM-NEXT: addvl sp, sp, #3
# ASM-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
# ASM-NEXT: .cfi_restore z8
# ASM-NEXT: .cfi_restore z9
# ASM-NEXT: .cfi_restore z10
# ASM-NEXT: ldr p6, [sp, #5, mul vl]
# ASM-NEXT: ldr p5, [sp, #6, mul vl]
# ASM-NEXT: ldr p4, [sp, #7, mul vl]
# ASM-NEXT: addvl sp, sp, #1
# ASM-NEXT: .cfi_def_cfa wsp, 16
# ASM-NEXT: ldr x29, [sp], #16
# ASM-NEXT: .cfi_def_cfa_offset 0
# ASM-NEXT: .cfi_restore w29
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1040, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1040, DW_OP_bregx 0x2e +0, DW_OP_consts +32, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_expression: reg72 DW_OP_bregx 0x2e +0, DW_OP_consts -16, DW_OP_mul, DW_OP_plus, DW_OP_consts -1040, DW_OP_plus
# UNWINDINFO: DW_CFA_expression: reg73 DW_OP_bregx 0x2e +0, DW_OP_consts -24, DW_OP_mul, DW_OP_plus, DW_OP_consts -1040, DW_OP_plus
# UNWINDINFO: DW_CFA_expression: reg74 DW_OP_bregx 0x2e +0, DW_OP_consts -32, DW_OP_mul, DW_OP_plus, DW_OP_consts -1040, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2096, DW_OP_bregx 0x2e +0, DW_OP_consts +32, DW_OP_mul, DW_OP_plus
#
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1040, DW_OP_bregx 0x2e +0, DW_OP_consts +32, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_consts +32, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg104
# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg105
# UNWINDINFO-NEXT: DW_CFA_restore_extended: reg106
# UNWINDINFO: DW_CFA_def_cfa: reg31 +16
# UNWINDINFO: DW_CFA_def_cfa_offset: +0
# UNWINDINFO-NEXT: DW_CFA_restore: reg29
name: save_restore_ppr_zpr
stack:
- { id: 0, stack-id: default, size: 32, alignment: 16 }
body: |
bb.0.entry:
$p4 = IMPLICIT_DEF
$p5 = IMPLICIT_DEF
$p6 = IMPLICIT_DEF
$z8 = IMPLICIT_DEF
$z9 = IMPLICIT_DEF
$z10 = IMPLICIT_DEF
RET_ReallyLR