blob: a1eb1ceeaf19b2cfa8fe874264800a294768bd49 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme,+sme2p1 -verify-machineinstrs < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
declare void @bar_enabled(<vscale x 4 x i32>) #0
declare void @bar(<vscale x 4 x i32>)
declare <vscale x 4 x i32> @bar_retv_enabled() #0
declare <vscale x 4 x i32> @bar_retv()
; Non-streaming -> calls streaming callee
define void @foo_non_streaming_pass_arg(ptr %arg) {
; CHECK-LABEL: foo_non_streaming_pass_arg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 96
; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: .cfi_def_cfa w29, 32
; CHECK-NEXT: .cfi_offset w28, -8
; CHECK-NEXT: .cfi_offset vg, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: .cfi_offset b8, -40
; CHECK-NEXT: .cfi_offset b9, -48
; CHECK-NEXT: .cfi_offset b10, -56
; CHECK-NEXT: .cfi_offset b11, -64
; CHECK-NEXT: .cfi_offset b12, -72
; CHECK-NEXT: .cfi_offset b13, -80
; CHECK-NEXT: .cfi_offset b14, -88
; CHECK-NEXT: .cfi_offset b15, -96
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: addsvl x8, x8, #-1
; CHECK-NEXT: cbz x8, .LBB0_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: brk #0x1
; CHECK-NEXT: .LBB0_2: // %entry
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: sub x8, x29, #64
; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ldr z0, [x8, #-1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: bl bar_enabled
; CHECK-NEXT: smstop sm
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: .cfi_def_cfa wsp, 96
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w28
; CHECK-NEXT: .cfi_restore vg
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: .cfi_restore b8
; CHECK-NEXT: .cfi_restore b9
; CHECK-NEXT: .cfi_restore b10
; CHECK-NEXT: .cfi_restore b11
; CHECK-NEXT: .cfi_restore b12
; CHECK-NEXT: .cfi_restore b13
; CHECK-NEXT: .cfi_restore b14
; CHECK-NEXT: .cfi_restore b15
; CHECK-NEXT: ret
entry:
%v = load <vscale x 4 x i32>, ptr %arg, align 16
tail call void @bar_enabled(<vscale x 4 x i32> %v) #0
ret void
}
; Streaming-compatible -> calls streaming callee
define void @foo_streaming_compatible_pass_arg(ptr %arg) #1 {
; CHECK-LABEL: foo_streaming_compatible_pass_arg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #1136
; CHECK-NEXT: .cfi_def_cfa_offset 1136
; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill
; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill
; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #1120] // 8-byte Folded Spill
; CHECK-NEXT: add x29, sp, #1088
; CHECK-NEXT: .cfi_def_cfa w29, 48
; CHECK-NEXT: .cfi_offset w19, -16
; CHECK-NEXT: .cfi_offset w28, -24
; CHECK-NEXT: .cfi_offset vg, -32
; CHECK-NEXT: .cfi_offset w30, -40
; CHECK-NEXT: .cfi_offset w29, -48
; CHECK-NEXT: .cfi_offset b8, -1080
; CHECK-NEXT: .cfi_offset b9, -1088
; CHECK-NEXT: .cfi_offset b10, -1096
; CHECK-NEXT: .cfi_offset b11, -1104
; CHECK-NEXT: .cfi_offset b12, -1112
; CHECK-NEXT: .cfi_offset b13, -1120
; CHECK-NEXT: .cfi_offset b14, -1128
; CHECK-NEXT: .cfi_offset b15, -1136
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: addsvl x8, x8, #-1
; CHECK-NEXT: cbz x8, .LBB1_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: brk #0x1
; CHECK-NEXT: .LBB1_2: // %entry
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: sub x8, x29, #1088
; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: tbnz w19, #0, .LBB1_4
; CHECK-NEXT: // %bb.3: // %entry
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB1_4: // %entry
; CHECK-NEXT: ldr z0, [x8, #-1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: bl bar_enabled
; CHECK-NEXT: tbnz w19, #0, .LBB1_6
; CHECK-NEXT: // %bb.5: // %entry
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB1_6: // %entry
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa wsp, 1136
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr x19, [sp, #1120] // 8-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload
; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #1136
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w28
; CHECK-NEXT: .cfi_restore vg
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: .cfi_restore b8
; CHECK-NEXT: .cfi_restore b9
; CHECK-NEXT: .cfi_restore b10
; CHECK-NEXT: .cfi_restore b11
; CHECK-NEXT: .cfi_restore b12
; CHECK-NEXT: .cfi_restore b13
; CHECK-NEXT: .cfi_restore b14
; CHECK-NEXT: .cfi_restore b15
; CHECK-NEXT: ret
entry:
%v = load <vscale x 4 x i32>, ptr %arg, align 16
tail call void @bar_enabled(<vscale x 4 x i32> %v) #0
ret void
}
; Streaming -> calls non-streaming callee
define void @foo_streaming_pass_arg(ptr %arg) #0 {
; CHECK-LABEL: foo_streaming_pass_arg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #1120
; CHECK-NEXT: .cfi_def_cfa_offset 1120
; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill
; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill
; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset w28, -8
; CHECK-NEXT: .cfi_offset vg, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: .cfi_offset b8, -1064
; CHECK-NEXT: .cfi_offset b9, -1072
; CHECK-NEXT: .cfi_offset b10, -1080
; CHECK-NEXT: .cfi_offset b11, -1088
; CHECK-NEXT: .cfi_offset b12, -1096
; CHECK-NEXT: .cfi_offset b13, -1104
; CHECK-NEXT: .cfi_offset b14, -1112
; CHECK-NEXT: .cfi_offset b15, -1120
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 2144
; CHECK-NEXT: smstop sm
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: addsvl x8, x8, #-1
; CHECK-NEXT: cbz x8, .LBB2_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: brk #0x1
; CHECK-NEXT: .LBB2_2: // %entry
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: bl bar
; CHECK-NEXT: smstart sm
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 1120
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload
; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #1120
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w28
; CHECK-NEXT: .cfi_restore vg
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: .cfi_restore b8
; CHECK-NEXT: .cfi_restore b9
; CHECK-NEXT: .cfi_restore b10
; CHECK-NEXT: .cfi_restore b11
; CHECK-NEXT: .cfi_restore b12
; CHECK-NEXT: .cfi_restore b13
; CHECK-NEXT: .cfi_restore b14
; CHECK-NEXT: .cfi_restore b15
; CHECK-NEXT: ret
entry:
%v = load <vscale x 4 x i32>, ptr %arg, align 16
tail call void @bar(<vscale x 4 x i32> %v)
ret void
}
; Non-streaming -> returns SVE value from streaming callee
define void @foo_non_streaming_retval(ptr %ptr) {
; CHECK-LABEL: foo_non_streaming_retval:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: stp x28, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: .cfi_def_cfa w29, 48
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w28, -16
; CHECK-NEXT: .cfi_offset vg, -32
; CHECK-NEXT: .cfi_offset w30, -40
; CHECK-NEXT: .cfi_offset w29, -48
; CHECK-NEXT: .cfi_offset b8, -56
; CHECK-NEXT: .cfi_offset b9, -64
; CHECK-NEXT: .cfi_offset b10, -72
; CHECK-NEXT: .cfi_offset b11, -80
; CHECK-NEXT: .cfi_offset b12, -88
; CHECK-NEXT: .cfi_offset b13, -96
; CHECK-NEXT: .cfi_offset b14, -104
; CHECK-NEXT: .cfi_offset b15, -112
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: addsvl x8, x8, #-1
; CHECK-NEXT: cbz x8, .LBB3_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: brk #0x1
; CHECK-NEXT: .LBB3_2: // %entry
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstart sm
; CHECK-NEXT: bl bar_retv_enabled
; CHECK-NEXT: sub x8, x29, #64
; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldr z0, [x8, #-1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: str z0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: .cfi_def_cfa wsp, 112
; CHECK-NEXT: ldp x28, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w28
; CHECK-NEXT: .cfi_restore vg
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: .cfi_restore b8
; CHECK-NEXT: .cfi_restore b9
; CHECK-NEXT: .cfi_restore b10
; CHECK-NEXT: .cfi_restore b11
; CHECK-NEXT: .cfi_restore b12
; CHECK-NEXT: .cfi_restore b13
; CHECK-NEXT: .cfi_restore b14
; CHECK-NEXT: .cfi_restore b15
; CHECK-NEXT: ret
entry:
%v = tail call <vscale x 4 x i32> @bar_retv_enabled() #0
store <vscale x 4 x i32> %v, ptr %ptr, align 16
ret void
}
; Streaming-compatible -> returns SVE value from streaming callee
define void @foo_streaming_compatible_retval(ptr %ptr) #1 {
; CHECK-LABEL: foo_streaming_compatible_retval:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #1136
; CHECK-NEXT: .cfi_def_cfa_offset 1136
; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill
; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill
; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill
; CHECK-NEXT: str x20, [sp, #1120] // 8-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #1128] // 8-byte Folded Spill
; CHECK-NEXT: add x29, sp, #1088
; CHECK-NEXT: .cfi_def_cfa w29, 48
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w28, -24
; CHECK-NEXT: .cfi_offset vg, -32
; CHECK-NEXT: .cfi_offset w30, -40
; CHECK-NEXT: .cfi_offset w29, -48
; CHECK-NEXT: .cfi_offset b8, -1080
; CHECK-NEXT: .cfi_offset b9, -1088
; CHECK-NEXT: .cfi_offset b10, -1096
; CHECK-NEXT: .cfi_offset b11, -1104
; CHECK-NEXT: .cfi_offset b12, -1112
; CHECK-NEXT: .cfi_offset b13, -1120
; CHECK-NEXT: .cfi_offset b14, -1128
; CHECK-NEXT: .cfi_offset b15, -1136
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mrs x20, SVCR
; CHECK-NEXT: addsvl x8, x8, #-1
; CHECK-NEXT: cbz x8, .LBB4_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: brk #0x1
; CHECK-NEXT: .LBB4_2: // %entry
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: tbnz w20, #0, .LBB4_4
; CHECK-NEXT: // %bb.3: // %entry
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB4_4: // %entry
; CHECK-NEXT: bl bar_retv_enabled
; CHECK-NEXT: sub x8, x29, #1088
; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: tbnz w20, #0, .LBB4_6
; CHECK-NEXT: // %bb.5: // %entry
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB4_6: // %entry
; CHECK-NEXT: ldr z0, [x8, #-1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: str z0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa wsp, 1136
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr x19, [sp, #1128] // 8-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldr x20, [sp, #1120] // 8-byte Folded Reload
; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload
; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #1136
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w20
; CHECK-NEXT: .cfi_restore w28
; CHECK-NEXT: .cfi_restore vg
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: .cfi_restore b8
; CHECK-NEXT: .cfi_restore b9
; CHECK-NEXT: .cfi_restore b10
; CHECK-NEXT: .cfi_restore b11
; CHECK-NEXT: .cfi_restore b12
; CHECK-NEXT: .cfi_restore b13
; CHECK-NEXT: .cfi_restore b14
; CHECK-NEXT: .cfi_restore b15
; CHECK-NEXT: ret
entry:
%v = tail call <vscale x 4 x i32> @bar_retv_enabled() #0
store <vscale x 4 x i32> %v, ptr %ptr, align 16
ret void
}
; Streaming -> returns SVE value from non-streaming callee
define void @foo_streaming_retval(ptr %ptr) #0 {
; CHECK-LABEL: foo_streaming_retval:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #1136
; CHECK-NEXT: .cfi_def_cfa_offset 1136
; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill
; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill
; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #1120] // 8-byte Folded Spill
; CHECK-NEXT: add x29, sp, #1088
; CHECK-NEXT: .cfi_def_cfa w29, 48
; CHECK-NEXT: .cfi_offset w19, -16
; CHECK-NEXT: .cfi_offset w28, -24
; CHECK-NEXT: .cfi_offset vg, -32
; CHECK-NEXT: .cfi_offset w30, -40
; CHECK-NEXT: .cfi_offset w29, -48
; CHECK-NEXT: .cfi_offset b8, -1080
; CHECK-NEXT: .cfi_offset b9, -1088
; CHECK-NEXT: .cfi_offset b10, -1096
; CHECK-NEXT: .cfi_offset b11, -1104
; CHECK-NEXT: .cfi_offset b12, -1112
; CHECK-NEXT: .cfi_offset b13, -1120
; CHECK-NEXT: .cfi_offset b14, -1128
; CHECK-NEXT: .cfi_offset b15, -1136
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: smstop sm
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: addsvl x8, x8, #-1
; CHECK-NEXT: cbz x8, .LBB5_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: brk #0x1
; CHECK-NEXT: .LBB5_2: // %entry
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: bl bar_retv
; CHECK-NEXT: sub x8, x29, #1088
; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ldr z0, [x8, #-1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: str z0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa wsp, 1136
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr x19, [sp, #1120] // 8-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload
; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #1136
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w28
; CHECK-NEXT: .cfi_restore vg
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: .cfi_restore b8
; CHECK-NEXT: .cfi_restore b9
; CHECK-NEXT: .cfi_restore b10
; CHECK-NEXT: .cfi_restore b11
; CHECK-NEXT: .cfi_restore b12
; CHECK-NEXT: .cfi_restore b13
; CHECK-NEXT: .cfi_restore b14
; CHECK-NEXT: .cfi_restore b15
; CHECK-NEXT: ret
entry:
%v = tail call <vscale x 4 x i32> @bar_retv()
store <vscale x 4 x i32> %v, ptr %ptr, align 16
ret void
}
attributes #0 = { "aarch64_pstate_sm_enabled" }
attributes #1 = { "aarch64_pstate_sm_compatible" }