| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -enable-post-misched=false | FileCheck %s |
| ; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -enable-post-misched=false -global-isel | FileCheck %s |
| |
| ; Tests for prolog sequences for stack probing, when using a 64KiB stack guard. |
| |
| ; 64k bytes is the largest frame we can probe in one go. |
| define void @static_65536(ptr %out) #0 { |
| ; CHECK-LABEL: static_65536: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 65552 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 65536, align 1 |
| store i8* %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 64k+16 bytes, still needs just one probe. |
| define void @static_65552(ptr %out) #0 { |
| ; CHECK-LABEL: static_65552: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 65552 |
| ; CHECK-NEXT: str xzr, [sp], #-16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 65568 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 65552, align 1 |
| store i8* %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 64k+1024 bytes, the largest frame which needs just one probe. |
| define void @static_66560(ptr %out) #0 { |
| ; CHECK-LABEL: static_66560: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 65552 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 66576 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1040 |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 66560, align 1 |
| store i8* %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 64k+1024+16 bytes, the smallest frame which needs two probes. |
| define void @static_66576(ptr %out) #0 { |
| ; CHECK-LABEL: static_66576: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 65552 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 66592 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1056 |
| ; CHECK-NEXT: add sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 66576, align 1 |
| store i8* %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 2*64k+1024, the largest frame needing two probes. |
| define void @static_132096(ptr %out) #0 { |
| ; CHECK-LABEL: static_132096: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 65552 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 131088 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 132112 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #32, lsl #12 // =131072 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1040 |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 132096, align 1 |
| store i8* %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 5*64k-16, the largest frame probed without a loop. |
| define void @static_327664(ptr %out) #0 { |
| ; CHECK-LABEL: static_327664: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 65552 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 131088 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 196624 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 262160 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: sub sp, sp, #15, lsl #12 // =61440 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 323600 |
| ; CHECK-NEXT: sub sp, sp, #4080 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 327680 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #79, lsl #12 // =323584 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 4096 |
| ; CHECK-NEXT: add sp, sp, #4080 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 327664, align 1 |
| store i8* %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 5*64k, smallest frame probed with a loop. |
| define void @static_327680(ptr %out) #0 { |
| ; CHECK-LABEL: static_327680: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 |
| ; CHECK-NEXT: .cfi_def_cfa w9, 327696 |
| ; CHECK-NEXT: .LBB6_1: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: cmp sp, x9 |
| ; CHECK-NEXT: b.ne .LBB6_1 |
| ; CHECK-NEXT: // %bb.2: // %entry |
| ; CHECK-NEXT: .cfi_def_cfa_register wsp |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 327680, align 1 |
| store i8* %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 5*64k+1024, large enough to use a loop, but not a multiple of 64KiB |
| ; so has a reminder, but no extra probe. |
| define void @static_328704(ptr %out) #0 { |
| ; CHECK-LABEL: static_328704: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 |
| ; CHECK-NEXT: .cfi_def_cfa w9, 327696 |
| ; CHECK-NEXT: .LBB7_1: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: cmp sp, x9 |
| ; CHECK-NEXT: b.ne .LBB7_1 |
| ; CHECK-NEXT: // %bb.2: // %entry |
| ; CHECK-NEXT: .cfi_def_cfa_register wsp |
| ; CHECK-NEXT: sub sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 328720 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1040 |
| ; CHECK-NEXT: add sp, sp, #1024 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 328704, align 1 |
| store i8* %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; 5*64k+1040, large enough to use a loop, has a reminder and |
| ; an extra probe. |
| define void @static_328720(ptr %out) #0 { |
| ; CHECK-LABEL: static_328720: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680 |
| ; CHECK-NEXT: .cfi_def_cfa w9, 327696 |
| ; CHECK-NEXT: .LBB8_1: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: cmp sp, x9 |
| ; CHECK-NEXT: b.ne .LBB8_1 |
| ; CHECK-NEXT: // %bb.2: // %entry |
| ; CHECK-NEXT: .cfi_def_cfa_register wsp |
| ; CHECK-NEXT: sub sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 328736 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1056 |
| ; CHECK-NEXT: add sp, sp, #1040 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 328720, align 1 |
| store i8* %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; A small allocation, but with a very large alignment requirement. We do this |
| ; by moving SP far enough that a sufficiently-aligned block will exist |
| ; somewhere in the stack frame, so must probe the whole of that larger SP move. |
| define void @static_16_align_131072(ptr %out) #0 { |
| ; CHECK-LABEL: static_16_align_131072: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #31, lsl #12 // =126976 |
| ; CHECK-NEXT: sub x9, x9, #4080 |
| ; CHECK-NEXT: and x9, x9, #0xfffffffffffe0000 |
| ; CHECK-NEXT: .LBB9_1: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: cmp sp, x9 |
| ; CHECK-NEXT: b.le .LBB9_3 |
| ; CHECK-NEXT: // %bb.2: // %entry |
| ; CHECK-NEXT: // in Loop: Header=BB9_1 Depth=1 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: b .LBB9_1 |
| ; CHECK-NEXT: .LBB9_3: // %entry |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: ldr xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: .cfi_def_cfa wsp, 16 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w30 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 16, align 131072 |
| store i8* %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; A small allocation, but with a very large alignment requirement which |
| ; is nevertheless small enough as to not need a loop. |
| define void @static_16_align_8192(ptr %out) #0 { |
| ; CHECK-LABEL: static_16_align_8192: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096 |
| ; CHECK-NEXT: sub x9, x9, #4080 |
| ; CHECK-NEXT: and sp, x9, #0xffffffffffffe000 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: .cfi_def_cfa wsp, 16 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w30 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 16, align 8192 |
| store i8* %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; A large allocation with a very large alignment requirement which |
| ; is nevertheless small enough as to not need a loop. |
| define void @static_32752_align_32k(ptr %out) #0 { |
| ; CHECK-LABEL: static_32752_align_32k: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: .cfi_def_cfa w29, 16 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: sub x9, sp, #7, lsl #12 // =28672 |
| ; CHECK-NEXT: sub x9, x9, #4080 |
| ; CHECK-NEXT: and sp, x9, #0xffffffffffff8000 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str x8, [x0] |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: .cfi_def_cfa wsp, 16 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| ; CHECK-NEXT: .cfi_restore w30 |
| ; CHECK-NEXT: .cfi_restore w29 |
| ; CHECK-NEXT: ret |
| entry: |
| %v = alloca i8, i64 32752, align 32768 |
| store i8* %v, ptr %out, align 8 |
| ret void |
| } |
| |
| attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "stack-probe-size"="65536" "frame-pointer"="none" } |