| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024 | FileCheck %s |
| ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK-FRAMELAYOUT |
| |
| ; CHECK-FRAMELAYOUT-LABEL: Function: zpr_and_ppr_local |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-32 x vscale], Type: Variable, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2064-32 x vscale], Type: Variable, Align: 16, Size: 1024 |
| |
| ; <GPRs> |
| ; %ppr_local sp+2048+30*vscale (= #15, mul vl for str/ldr PPR) |
| ; 14 x vscale bytes of padding sp+2048+16*vscale |
| ; <hazard padding> sp+1024+16*vscale |
| ; %zpr_local sp+1024 |
| ; <hazard padding> |
| ; -> sp |
| define void @zpr_and_ppr_local(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector) "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: zpr_and_ppr_local: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #2048 |
| ; CHECK-NEXT: addvl sp, sp, #-2 |
| ; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: add x8, sp, #2048 |
| ; CHECK-NEXT: str p0, [x8, #15, mul vl] |
| ; CHECK-NEXT: add x8, sp, #1024 |
| ; CHECK-NEXT: str z0, [x8] |
| ; CHECK-NEXT: add sp, sp, #2048 |
| ; CHECK-NEXT: addvl sp, sp, #2 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| %ppr_local = alloca <vscale x 16 x i1> |
| %zpr_local = alloca <vscale x 16 x i8> |
| store volatile <vscale x 16 x i1> %pred, ptr %ppr_local |
| store volatile <vscale x 16 x i8> %vector, ptr %zpr_local |
| ret void |
| } |
| |
| ; CHECK-FRAMELAYOUT-LABEL: Function: zpr_and_ppr_local_fp |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-32 x vscale], Type: Variable, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2064-32 x vscale], Type: Variable, Align: 16, Size: 1024 |
| |
| ; <GPRs> |
| ; -> fp |
| ; %ppr_local fp-2*vscale (= #-1, mul vl for str/ldr PPR) |
| ; 14 x vscale bytes of padding fp-16*vscale |
| ; <hazard padding> fp-1024-16*vscale |
| ; %zpr_local fp-1024-32*vscale (= #-2, mul vl for str/ldr ZPR) |
| ; <hazard padding> |
| ; -> sp |
| define void @zpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector) "aarch64_pstate_sm_compatible" "frame-pointer"="all" { |
| ; CHECK-LABEL: zpr_and_ppr_local_fp: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #2048 |
| ; CHECK-NEXT: addvl sp, sp, #-2 |
| ; CHECK-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x8, x29, #1024 |
| ; CHECK-NEXT: str p0, [x29, #-1, mul vl] |
| ; CHECK-NEXT: str z0, [x8, #-2, mul vl] |
| ; CHECK-NEXT: add sp, sp, #2048 |
| ; CHECK-NEXT: addvl sp, sp, #2 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| %ppr_local = alloca <vscale x 16 x i1> |
| %zpr_local = alloca <vscale x 16 x i8> |
| store volatile <vscale x 16 x i1> %pred, ptr %ppr_local |
| store volatile <vscale x 16 x i8> %vector, ptr %zpr_local |
| ret void |
| } |
| |
| ; CHECK-FRAMELAYOUT-LABEL: Function: fpr_and_ppr_local |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1048-16 x vscale], Type: Variable, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2080-16 x vscale], Type: Variable, Align: 16, Size: 1024 |
| |
| ; <GPRs> |
| ; %ppr_local sp+2064+14*vscale (= #7, mul vl for str/ldr PPR) |
| ; 14 x vscale bytes of padding sp+2064 |
| ; <hazard padding> sp+1040 |
| ; %fpr_local sp+1032 |
| ; 8 bytes of padding sp+1024 |
| ; <hazard padding> |
| ; -> sp |
| define void @fpr_and_ppr_local(<vscale x 16 x i1> %pred, double %double) "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: fpr_and_ppr_local: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #2064 |
| ; CHECK-NEXT: addvl sp, sp, #-1 |
| ; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: add x8, sp, #2064 |
| ; CHECK-NEXT: str p0, [x8, #7, mul vl] |
| ; CHECK-NEXT: str d0, [sp, #1032] |
| ; CHECK-NEXT: add sp, sp, #2064 |
| ; CHECK-NEXT: addvl sp, sp, #1 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| %ppr_local = alloca <vscale x 16 x i1> |
| %fpr_local = alloca double |
| store volatile <vscale x 16 x i1> %pred, ptr %ppr_local |
| store volatile double %double, ptr %fpr_local |
| ret void |
| } |
| |
| ; CHECK-FRAMELAYOUT-LABEL: Function: fpr_and_ppr_local_fp |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1048-16 x vscale], Type: Variable, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2080-16 x vscale], Type: Variable, Align: 16, Size: 1024 |
| |
| ; <GPRs> |
| ; -> fp |
| ; %ppr_local fp-2*vscale (= #-1, mul vl for str/ldr PPR) |
| ; 14 x vscale bytes of padding |
| ; <hazard padding> |
| ; %fpr_local sp+1032 |
| ; 8 bytes of padding sp+1024 |
| ; <hazard padding> |
| ; -> sp |
| define void @fpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, double %double) "aarch64_pstate_sm_compatible" "frame-pointer"="all" { |
| ; CHECK-LABEL: fpr_and_ppr_local_fp: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #2064 |
| ; CHECK-NEXT: addvl sp, sp, #-1 |
| ; CHECK-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: str p0, [x29, #-1, mul vl] |
| ; CHECK-NEXT: str d0, [sp, #1032] |
| ; CHECK-NEXT: add sp, sp, #2064 |
| ; CHECK-NEXT: addvl sp, sp, #1 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| %ppr_local = alloca <vscale x 16 x i1> |
| %fpr_local = alloca double |
| store volatile <vscale x 16 x i1> %pred, ptr %ppr_local |
| store volatile double %double, ptr %fpr_local |
| ret void |
| } |
| |
| ; CHECK-FRAMELAYOUT-LABEL: Function: gpr_and_ppr_local |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-32 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2064-32 x vscale], Type: Variable, Align: 16, Size: 1024 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2072-32 x vscale], Type: Variable, Align: 8, Size: 8 |
| |
| ; <CS GPRs> |
| ; %ppr_local sp+2064+30*vscale (= #15, mul vl for str/ldr PPR) |
| ; 14 x vscale bytes of padding |
| ; <hazard padding> sp+1040+16*vscale |
| ; <fpr callee save: z8> sp+1040 |
| ; <hazard padding> sp+16 |
| ; %gpr_local sp+8 |
| ; 8 bytes of padding |
| ; -> sp |
| define void @gpr_and_ppr_local(<vscale x 16 x i1> %pred, i64 %int) "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: gpr_and_ppr_local: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: addvl sp, sp, #-2 |
| ; CHECK-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2080 + 16 * VG |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1040 |
| ; CHECK-NEXT: add x8, sp, #2064 |
| ; CHECK-NEXT: //APP |
| ; CHECK-NEXT: //NO_APP |
| ; CHECK-NEXT: str p0, [x8, #15, mul vl] |
| ; CHECK-NEXT: str x0, [sp, #8] |
| ; CHECK-NEXT: add sp, sp, #1040 |
| ; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: addvl sp, sp, #2 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| tail call void asm sideeffect "", "~{d8}"() #1 ; Spill an FPR so hazard padding is needed |
| %ppr_local = alloca <vscale x 16 x i1> |
| %gpr_local = alloca i64 |
| store volatile <vscale x 16 x i1> %pred, ptr %ppr_local |
| store volatile i64 %int, ptr %gpr_local |
| ret void |
| } |
| |
| ; CHECK-FRAMELAYOUT-LABEL: Function: gpr_and_ppr_local_fp |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-32 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2064-32 x vscale], Type: Variable, Align: 16, Size: 1024 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2072-32 x vscale], Type: Variable, Align: 8, Size: 8 |
| |
| ; <CS GPRs> |
| ; -> fp |
| ; %ppr_local fp-2*vscale (= #-1, mul vl for str/ldr PPR) |
| ; 14 x vscale bytes of padding |
| ; <hazard padding> |
| ; <fpr callee save: z8> |
| ; <hazard padding> |
| ; %gpr_local sp+8 |
| ; 8 bytes of padding |
| ; -> sp |
| define void @gpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, i64 %int) "aarch64_pstate_sm_compatible" "frame-pointer"="all" { |
| ; CHECK-LABEL: gpr_and_ppr_local_fp: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: addvl sp, sp, #-2 |
| ; CHECK-NEXT: str z8, [sp] // 16-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1040 |
| ; CHECK-NEXT: //APP |
| ; CHECK-NEXT: //NO_APP |
| ; CHECK-NEXT: str p0, [x29, #-1, mul vl] |
| ; CHECK-NEXT: str x0, [sp, #8] |
| ; CHECK-NEXT: add sp, sp, #1040 |
| ; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: addvl sp, sp, #2 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| tail call void asm sideeffect "", "~{d8}"() #1 ; Spill an FPR so hazard padding is needed |
| %ppr_local = alloca <vscale x 16 x i1> |
| %gpr_local = alloca i64 |
| store volatile <vscale x 16 x i1> %pred, ptr %ppr_local |
| store volatile i64 %int, ptr %gpr_local |
| ret void |
| } |
| |
| ; CHECK-FRAMELAYOUT-LABEL: Function: all_stack_areas |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-4 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-6 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-8 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-10 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-12 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-14 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-16 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-18 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-20 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-22 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-24 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-34 x vscale], Type: Variable, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-64 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-80 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-96 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-112 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-128 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-144 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-160 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-176 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-192 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-208 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-224 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-240 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-256 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-272 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-288 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-304 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-320 x vscale], Type: Variable, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1048-320 x vscale], Type: Variable, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2080-320 x vscale], Type: Variable, Align: 16, Size: 1024 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2088-320 x vscale], Type: Variable, Align: 8, Size: 8 |
| |
| ; <CS GPRs> |
| ; <CS PPRs> |
| ; %ppr_local sp+2080+286*vscale (addvl #17, addpl #7) |
| ; 14 * vscale bytes of padding sp+2080+272*vscale |
| ; <hazard padding> sp+1056+272*vscale |
| ; <CS ZPRs> sp+1056+16*vscale |
| ; %zpr_local sp+1056 |
| ; %fpr_local sp+1048 |
| ; 8 bytes of padding sp+1040 |
| ; <hazard padding> sp+16 |
| ; %gpr_local sp+8 |
| ; 8 bytes of padding sp |
| ; -> sp |
| define void @all_stack_areas(<vscale x 16 x i1> %pred, double %fp) { |
| ; CHECK-LABEL: all_stack_areas: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: addvl sp, sp, #-2 |
| ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: addvl sp, sp, #-17 |
| ; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #1056 |
| ; CHECK-NEXT: addvl sp, sp, #-1 |
| ; CHECK-NEXT: .cfi_escape 0x0f, 0x0b, 0x8f, 0xb0, 0x10, 0x92, 0x2e, 0x00, 0x11, 0xa0, 0x01, 0x1e, 0x22 // sp + 2096 + 160 * VG |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 32 * VG - 1040 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 40 * VG - 1040 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d10 @ cfa - 48 * VG - 1040 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d11 @ cfa - 56 * VG - 1040 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d12 @ cfa - 64 * VG - 1040 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d13 @ cfa - 72 * VG - 1040 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d14 @ cfa - 80 * VG - 1040 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xa8, 0x7f, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d15 @ cfa - 88 * VG - 1040 |
| ; CHECK-NEXT: add x0, sp, #2080 |
| ; CHECK-NEXT: add x8, sp, #2080 |
| ; CHECK-NEXT: add x1, sp, #1056 |
| ; CHECK-NEXT: addvl x0, x0, #17 |
| ; CHECK-NEXT: add x2, sp, #1048 |
| ; CHECK-NEXT: add x3, sp, #8 |
| ; CHECK-NEXT: addpl x0, x0, #7 |
| ; CHECK-NEXT: str d0, [sp, #1048] |
| ; CHECK-NEXT: str p0, [x8, #143, mul vl] |
| ; CHECK-NEXT: bl foo |
| ; CHECK-NEXT: add sp, sp, #1056 |
| ; CHECK-NEXT: addvl sp, sp, #1 |
| ; CHECK-NEXT: ldr z23, [sp] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: addvl sp, sp, #17 |
| ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: addvl sp, sp, #2 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| %ppr_local = alloca <vscale x 16 x i1> |
| %zpr_local = alloca <vscale x 16 x i8> |
| %fpr_local = alloca double |
| ; // Needed to sort %fpr_local into the FPR region |
| store double %fp, ptr %fpr_local |
| ; // Needed to sort %ppr_local into the PPR region |
| store <vscale x 16 x i1> %pred, ptr %ppr_local |
| %gpr_local = alloca i64 |
| call void @foo(ptr %ppr_local, ptr %zpr_local, ptr %fpr_local, ptr %gpr_local) |
| ret void |
| } |
| declare void @foo(ptr, ptr, ptr, ptr) |
| |
| ; CHECK-FRAMELAYOUT-LABEL: Function: all_stack_areas_fp |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-2 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-4 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-6 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-8 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-10 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-12 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-14 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-18 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-20 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-22 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-24 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-34 x vscale], Type: Variable, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-64 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-80 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-96 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-112 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-128 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-144 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-160 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-176 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-192 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-208 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-224 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-240 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-256 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-272 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-288 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-304 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-320 x vscale], Type: Variable, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1064-320 x vscale], Type: Variable, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2096-320 x vscale], Type: Variable, Align: 16, Size: 1024 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2104-320 x vscale], Type: Variable, Align: 8, Size: 8 |
| |
| ; <CS GPRs> |
| ; -> fp |
| ; <CS PPRs> fp-32*vscale |
| ; %ppr_local fp-34*vscale (addpl #-17) |
| ; 14 * vscale bytes of padding fp-48*vscale |
| ; <hazard padding> fp-1024-48*vscale |
| ; <CS ZPRs> fp-1024-304*vscale |
| ; %zpr_local sp-1024-320*vscale (addvl #-20) |
| ; %fpr_local sp+1048 |
| ; 8 bytes of padding sp+1040 |
| ; <hazard padding> sp+16 |
| ; %gpr_local sp+8 |
| ; 8 bytes of padding sp |
| ; -> sp |
| define void @all_stack_areas_fp(<vscale x 16 x i1> %pred, double %fp) "frame-pointer"="all" { |
| ; CHECK-LABEL: all_stack_areas_fp: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: str x28, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: addvl sp, sp, #-2 |
| ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: addvl sp, sp, #-17 |
| ; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #1056 |
| ; CHECK-NEXT: addvl sp, sp, #-1 |
| ; CHECK-NEXT: .cfi_def_cfa w29, 32 |
| ; CHECK-NEXT: .cfi_offset w28, -16 |
| ; CHECK-NEXT: .cfi_offset w30, -24 |
| ; CHECK-NEXT: .cfi_offset w29, -32 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 32 * VG - 1056 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 40 * VG - 1056 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 48 * VG - 1056 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d11 @ cfa - 56 * VG - 1056 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d12 @ cfa - 64 * VG - 1056 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d13 @ cfa - 72 * VG - 1056 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d14 @ cfa - 80 * VG - 1056 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xa8, 0x7f, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d15 @ cfa - 88 * VG - 1056 |
| ; CHECK-NEXT: sub x1, x29, #1024 |
| ; CHECK-NEXT: addpl x0, x29, #-17 |
| ; CHECK-NEXT: add x2, sp, #1048 |
| ; CHECK-NEXT: addvl x1, x1, #-20 |
| ; CHECK-NEXT: add x3, sp, #8 |
| ; CHECK-NEXT: str d0, [sp, #1048] |
| ; CHECK-NEXT: str p0, [x29, #-17, mul vl] |
| ; CHECK-NEXT: bl foo |
| ; CHECK-NEXT: add sp, sp, #1056 |
| ; CHECK-NEXT: addvl sp, sp, #1 |
| ; CHECK-NEXT: ldr z23, [sp] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: addvl sp, sp, #17 |
| ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: addvl sp, sp, #2 |
| ; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| %ppr_local = alloca <vscale x 16 x i1> |
| %zpr_local = alloca <vscale x 16 x i8> |
| %fpr_local = alloca double |
| ; // Needed to sort %fpr_local into the FPR region |
| store double %fp, ptr %fpr_local |
| ; // Needed to sort %ppr_local into the PPR region |
| store <vscale x 16 x i1> %pred, ptr %ppr_local |
| %gpr_local = alloca i64 |
| call void @foo(ptr %ppr_local, ptr %zpr_local, ptr %fpr_local, ptr %gpr_local) |
| ret void |
| } |
| |
| ; CHECK-FRAMELAYOUT-LABEL: Function: svecc_call |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48], Type: Spill, Align: 16, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-56], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64], Type: Spill, Align: 8, Size: 8 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-2 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-4 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-6 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-8 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-10 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-12 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-14 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-16 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-18 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-20 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-22 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-24 x vscale], Type: Spill, Align: 2, Size: vscale x 2 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-48 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-64 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-80 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-96 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-112 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-128 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-144 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-160 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-176 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-192 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-208 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-224 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-240 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-256 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-272 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-288 x vscale], Type: Spill, Align: 16, Size: vscale x 16 |
| ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2112-288 x vscale], Type: Variable, Align: 16, Size: 1024 |
| |
| define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: svecc_call: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 64 |
| ; CHECK-NEXT: cntd x9 |
| ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: .cfi_def_cfa w29, 64 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w26, -16 |
| ; CHECK-NEXT: .cfi_offset w27, -24 |
| ; CHECK-NEXT: .cfi_offset w28, -32 |
| ; CHECK-NEXT: .cfi_offset vg, -48 |
| ; CHECK-NEXT: .cfi_offset w30, -56 |
| ; CHECK-NEXT: .cfi_offset w29, -64 |
| ; CHECK-NEXT: addvl sp, sp, #-2 |
| ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: addvl sp, sp, #-16 |
| ; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 24 * IncomingVG - 1088 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 32 * IncomingVG - 1088 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 40 * IncomingVG - 1088 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 48 * IncomingVG - 1088 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 56 * IncomingVG - 1088 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 64 * IncomingVG - 1088 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 72 * IncomingVG - 1088 |
| ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 80 * IncomingVG - 1088 |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: mov x8, x0 |
| ; CHECK-NEXT: bl __arm_sme_state |
| ; CHECK-NEXT: mov x19, x0 |
| ; CHECK-NEXT: //APP |
| ; CHECK-NEXT: //NO_APP |
| ; CHECK-NEXT: tbz w19, #0, .LBB8_2 |
| ; CHECK-NEXT: // %bb.1: // %entry |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: .LBB8_2: // %entry |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: mov w1, #45 // =0x2d |
| ; CHECK-NEXT: mov w2, #37 // =0x25 |
| ; CHECK-NEXT: bl memset |
| ; CHECK-NEXT: tbz w19, #0, .LBB8_4 |
| ; CHECK-NEXT: // %bb.3: // %entry |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: .LBB8_4: // %entry |
| ; CHECK-NEXT: mov w0, #22647 // =0x5877 |
| ; CHECK-NEXT: movk w0, #59491, lsl #16 |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: ldr z23, [sp] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: addvl sp, sp, #16 |
| ; CHECK-NEXT: .cfi_restore z8 |
| ; CHECK-NEXT: .cfi_restore z9 |
| ; CHECK-NEXT: .cfi_restore z10 |
| ; CHECK-NEXT: .cfi_restore z11 |
| ; CHECK-NEXT: .cfi_restore z12 |
| ; CHECK-NEXT: .cfi_restore z13 |
| ; CHECK-NEXT: .cfi_restore z14 |
| ; CHECK-NEXT: .cfi_restore z15 |
| ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload |
| ; CHECK-NEXT: addvl sp, sp, #2 |
| ; CHECK-NEXT: .cfi_def_cfa wsp, 64 |
| ; CHECK-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w19 |
| ; CHECK-NEXT: .cfi_restore w26 |
| ; CHECK-NEXT: .cfi_restore w27 |
| ; CHECK-NEXT: .cfi_restore w28 |
| ; CHECK-NEXT: .cfi_restore vg |
| ; CHECK-NEXT: .cfi_restore w30 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 |
| %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37) |
| ret i32 -396142473 |
| } |
| declare ptr @memset(ptr, i32, i32) |
| |
| ; FIXME: aarch64-split-sve-objects is currently not supported in this function |
| ; as it requires stack reealignment (for the 32-byte aligned alloca). |
| ; GPR CSRs |
| ; <hazard padding> |
| ; FPR CSRs |
| ; <hazrd padding> |
| ; <SVE locals (PPRs and ZPRs)> <--- hazard between PPRs and ZPRs here! |
| ; <realignment padding> |
| ; -> sp |
| define void @zpr_and_ppr_local_realignment(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector, i64 %gpr) "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: zpr_and_ppr_local_realignment: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #1040 |
| ; CHECK-NEXT: sub x9, sp, #1040 |
| ; CHECK-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill |
| ; CHECK-NEXT: add x29, sp, #1024 |
| ; CHECK-NEXT: addvl x9, x9, #-2 |
| ; CHECK-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill |
| ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 |
| ; CHECK-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x8, x29, #1024 |
| ; CHECK-NEXT: str p0, [x8, #-1, mul vl] |
| ; CHECK-NEXT: str z0, [x8, #-2, mul vl] |
| ; CHECK-NEXT: str x0, [sp] |
| ; CHECK-NEXT: sub sp, x29, #1024 |
| ; CHECK-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload |
| ; CHECK-NEXT: add sp, sp, #1040 |
| ; CHECK-NEXT: ret |
| %ppr_local = alloca <vscale x 16 x i1> |
| %zpr_local = alloca <vscale x 16 x i8> |
| %gpr_local = alloca i64, align 32 |
| store volatile <vscale x 16 x i1> %pred, ptr %ppr_local |
| store volatile <vscale x 16 x i8> %vector, ptr %zpr_local |
| store volatile i64 %gpr, ptr %gpr_local |
| ret void |
| } |
| |
| define void @zpr_and_ppr_local_stack_probing(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector, i64 %gpr) |
| ; CHECK-LABEL: zpr_and_ppr_local_stack_probing: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #2848 |
| ; CHECK-NEXT: addvl sp, sp, #-2 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xb0, 0x16, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2864 + 16 * VG |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: add x8, sp, #2848 |
| ; CHECK-NEXT: str p0, [x8, #15, mul vl] |
| ; CHECK-NEXT: add x8, sp, #1824 |
| ; CHECK-NEXT: str z0, [x8] |
| ; CHECK-NEXT: str x0, [sp] |
| ; CHECK-NEXT: add sp, sp, #2848 |
| ; CHECK-NEXT: addvl sp, sp, #2 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| "probe-stack"="inline-asm" "stack-probe-size"="4096" "frame-pointer"="none" "aarch64_pstate_sm_compatible" |
| { |
| %ppr_local = alloca <vscale x 16 x i1> |
| %zpr_local = alloca <vscale x 16 x i8> |
| %gpr_local = alloca i64, i64 100, align 8 |
| store volatile <vscale x 16 x i1> %pred, ptr %ppr_local |
| store volatile <vscale x 16 x i8> %vector, ptr %zpr_local |
| store volatile i64 %gpr, ptr %gpr_local |
| ret void |
| } |