blob: 2f15e317a7f58b224a081e130b0e4c63a6911e7b [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -enable-post-misched=false | FileCheck %s
; RUN: llc -mtriple aarch64-none-eabi < %s -verify-machineinstrs -enable-post-misched=false -global-isel | FileCheck %s
; Tests for prolog sequences for stack probing, when using a 64KiB stack guard.
; 64k bytes is the largest frame we can probe in one go.
define void @static_65536(ptr %out) #0 {
; CHECK-LABEL: static_65536:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 65552
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 65536, align 1
store i8* %v, ptr %out, align 8
ret void
}
; 64k+16 bytes, still needs just one probe.
define void @static_65552(ptr %out) #0 {
; CHECK-LABEL: static_65552:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 65552
; CHECK-NEXT: str xzr, [sp], #-16
; CHECK-NEXT: .cfi_def_cfa_offset 65568
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 65552, align 1
store i8* %v, ptr %out, align 8
ret void
}
; 64k+1024 bytes, the largest frame which needs just one probe.
define void @static_66560(ptr %out) #0 {
; CHECK-LABEL: static_66560:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 65552
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 66576
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 1040
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 66560, align 1
store i8* %v, ptr %out, align 8
ret void
}
; 64k+1024+16 bytes, the smallest frame which needs two probes.
define void @static_66576(ptr %out) #0 {
; CHECK-LABEL: static_66576:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 65552
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #1040
; CHECK-NEXT: .cfi_def_cfa_offset 66592
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 1056
; CHECK-NEXT: add sp, sp, #1040
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 66576, align 1
store i8* %v, ptr %out, align 8
ret void
}
; 2*64k+1024, the largest frame needing two probes.
define void @static_132096(ptr %out) #0 {
; CHECK-LABEL: static_132096:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 65552
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 131088
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 132112
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #32, lsl #12 // =131072
; CHECK-NEXT: .cfi_def_cfa_offset 1040
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 132096, align 1
store i8* %v, ptr %out, align 8
ret void
}
; 5*64k-16, the largest frame probed without a loop.
define void @static_327664(ptr %out) #0 {
; CHECK-LABEL: static_327664:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 65552
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 131088
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 196624
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: .cfi_def_cfa_offset 262160
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #15, lsl #12 // =61440
; CHECK-NEXT: .cfi_def_cfa_offset 323600
; CHECK-NEXT: sub sp, sp, #4080
; CHECK-NEXT: .cfi_def_cfa_offset 327680
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #79, lsl #12 // =323584
; CHECK-NEXT: .cfi_def_cfa_offset 4096
; CHECK-NEXT: add sp, sp, #4080
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 327664, align 1
store i8* %v, ptr %out, align 8
ret void
}
; 5*64k, smallest frame probed with a loop.
define void @static_327680(ptr %out) #0 {
; CHECK-LABEL: static_327680:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680
; CHECK-NEXT: .cfi_def_cfa w9, 327696
; CHECK-NEXT: .LBB6_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: cmp sp, x9
; CHECK-NEXT: b.ne .LBB6_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 327680, align 1
store i8* %v, ptr %out, align 8
ret void
}
; 5*64k+1024, large enough to use a loop, but not a multiple of 64KiB
; so has a reminder, but no extra probe.
define void @static_328704(ptr %out) #0 {
; CHECK-LABEL: static_328704:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680
; CHECK-NEXT: .cfi_def_cfa w9, 327696
; CHECK-NEXT: .LBB7_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: cmp sp, x9
; CHECK-NEXT: b.ne .LBB7_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 328720
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680
; CHECK-NEXT: .cfi_def_cfa_offset 1040
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 328704, align 1
store i8* %v, ptr %out, align 8
ret void
}
; 5*64k+1040, large enough to use a loop, has a reminder and
; an extra probe.
define void @static_328720(ptr %out) #0 {
; CHECK-LABEL: static_328720:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680
; CHECK-NEXT: .cfi_def_cfa w9, 327696
; CHECK-NEXT: .LBB8_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: cmp sp, x9
; CHECK-NEXT: b.ne .LBB8_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: sub sp, sp, #1040
; CHECK-NEXT: .cfi_def_cfa_offset 328736
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680
; CHECK-NEXT: .cfi_def_cfa_offset 1056
; CHECK-NEXT: add sp, sp, #1040
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 328720, align 1
store i8* %v, ptr %out, align 8
ret void
}
; A small allocation, but with a very large alignment requirement. We do this
; by moving SP far enough that a sufficiently-aligned block will exist
; somewhere in the stack frame, so must probe the whole of that larger SP move.
define void @static_16_align_131072(ptr %out) #0 {
; CHECK-LABEL: static_16_align_131072:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #31, lsl #12 // =126976
; CHECK-NEXT: sub x9, x9, #4080
; CHECK-NEXT: and x9, x9, #0xfffffffffffe0000
; CHECK-NEXT: .LBB9_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: cmp sp, x9
; CHECK-NEXT: b.le .LBB9_3
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: // in Loop: Header=BB9_1 Depth=1
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: b .LBB9_1
; CHECK-NEXT: .LBB9_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: ldr xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 16, align 131072
store i8* %v, ptr %out, align 8
ret void
}
; A small allocation, but with a very large alignment requirement which
; is nevertheless small enough as to not need a loop.
define void @static_16_align_8192(ptr %out) #0 {
; CHECK-LABEL: static_16_align_8192:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096
; CHECK-NEXT: sub x9, x9, #4080
; CHECK-NEXT: and sp, x9, #0xffffffffffffe000
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 16, align 8192
store i8* %v, ptr %out, align 8
ret void
}
; A large allocation with a very large alignment requirement which
; is nevertheless small enough as to not need a loop.
define void @static_32752_align_32k(ptr %out) #0 {
; CHECK-LABEL: static_32752_align_32k:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #7, lsl #12 // =28672
; CHECK-NEXT: sub x9, x9, #4080
; CHECK-NEXT: and sp, x9, #0xffffffffffff8000
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 32752, align 32768
store i8* %v, ptr %out, align 8
ret void
}
attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "stack-probe-size"="65536" "frame-pointer"="none" }