blob: d2715b58439d8b24390cdd879be2e89cca0b9601 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs -aarch64-new-sme-abi=false < %s | FileCheck %s --check-prefix=CHECK-SDAG
; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
declare void @shared_za_callee() "aarch64_inout_za"
define void @private_za() "aarch64_new_za" {
; CHECK-SDAG-LABEL: private_za:
; CHECK-SDAG: // %bb.0: // %prelude
; CHECK-SDAG-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16
; CHECK-SDAG-NEXT: .cfi_offset w30, -16
; CHECK-SDAG-NEXT: rdsvl x8, #1
; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0
; CHECK-SDAG-NEXT: cbz x8, .LBB0_2
; CHECK-SDAG-NEXT: b .LBB0_1
; CHECK-SDAG-NEXT: .LBB0_1: // %save.za
; CHECK-SDAG-NEXT: bl __arm_tpidr2_save
; CHECK-SDAG-NEXT: mov x8, xzr
; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x8
; CHECK-SDAG-NEXT: b .LBB0_2
; CHECK-SDAG-NEXT: .LBB0_2:
; CHECK-SDAG-NEXT: smstart za
; CHECK-SDAG-NEXT: zero {za}
; CHECK-SDAG-NEXT: bl shared_za_callee
; CHECK-SDAG-NEXT: smstop za
; CHECK-SDAG-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SDAG-NEXT: ret
;
; CHECK-LABEL: private_za:
; CHECK: // %bb.0:
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: cbnz x8, .LBB0_1
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: bl __arm_tpidr2_save
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: zero {za}
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: smstart za
; CHECK-NEXT: bl shared_za_callee
; CHECK-NEXT: smstop za
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
call void @shared_za_callee()
ret void
}
; Note: This test must run at -O0 as otherwise the multiple exits are optimized out.
; TODO: We should be able to omit the ZA save here (as this function does not use ZA).
define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za" {
; CHECK-SDAG-LABEL: private_za_multiple_exit:
; CHECK-SDAG: // %bb.0: // %prelude
; CHECK-SDAG-NEXT: sub sp, sp, #32
; CHECK-SDAG-NEXT: str x30, [sp, #16] // 8-byte Spill
; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 32
; CHECK-SDAG-NEXT: .cfi_offset w30, -16
; CHECK-SDAG-NEXT: str x2, [sp] // 8-byte Spill
; CHECK-SDAG-NEXT: str w1, [sp, #8] // 4-byte Spill
; CHECK-SDAG-NEXT: str w0, [sp, #12] // 4-byte Spill
; CHECK-SDAG-NEXT: rdsvl x8, #1
; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0
; CHECK-SDAG-NEXT: cbz x8, .LBB1_2
; CHECK-SDAG-NEXT: b .LBB1_1
; CHECK-SDAG-NEXT: .LBB1_1: // %save.za
; CHECK-SDAG-NEXT: bl __arm_tpidr2_save
; CHECK-SDAG-NEXT: mov x8, xzr
; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x8
; CHECK-SDAG-NEXT: b .LBB1_2
; CHECK-SDAG-NEXT: .LBB1_2: // %entry
; CHECK-SDAG-NEXT: ldr x8, [sp] // 8-byte Reload
; CHECK-SDAG-NEXT: smstart za
; CHECK-SDAG-NEXT: zero {za}
; CHECK-SDAG-NEXT: subs x8, x8, #1
; CHECK-SDAG-NEXT: b.ne .LBB1_4
; CHECK-SDAG-NEXT: b .LBB1_3
; CHECK-SDAG-NEXT: .LBB1_3: // %if.else
; CHECK-SDAG-NEXT: ldr w8, [sp, #12] // 4-byte Reload
; CHECK-SDAG-NEXT: ldr w9, [sp, #8] // 4-byte Reload
; CHECK-SDAG-NEXT: add w0, w8, w9
; CHECK-SDAG-NEXT: smstop za
; CHECK-SDAG-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-SDAG-NEXT: add sp, sp, #32
; CHECK-SDAG-NEXT: ret
; CHECK-SDAG-NEXT: .LBB1_4: // %if.end
; CHECK-SDAG-NEXT: ldr w8, [sp, #12] // 4-byte Reload
; CHECK-SDAG-NEXT: ldr w9, [sp, #8] // 4-byte Reload
; CHECK-SDAG-NEXT: subs w0, w8, w9
; CHECK-SDAG-NEXT: smstop za
; CHECK-SDAG-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-SDAG-NEXT: add sp, sp, #32
; CHECK-SDAG-NEXT: ret
;
; CHECK-LABEL: private_za_multiple_exit:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: cbnz x8, .LBB1_1
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_1: // %entry
; CHECK-NEXT: bl __arm_tpidr2_save
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: zero {za}
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_2: // %entry
; CHECK-NEXT: smstart za
; CHECK-NEXT: str w1, [sp, #8] // 4-byte Spill
; CHECK-NEXT: str w0, [sp, #12] // 4-byte Spill
; CHECK-NEXT: subs x8, x2, #1
; CHECK-NEXT: b.ne .LBB1_4
; CHECK-NEXT: b .LBB1_3
; CHECK-NEXT: .LBB1_3: // %if.else
; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload
; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Reload
; CHECK-NEXT: add w0, w8, w9
; CHECK-NEXT: smstop za
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_4: // %if.end
; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload
; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Reload
; CHECK-NEXT: subs w0, w8, w9
; CHECK-NEXT: smstop za
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
entry:
%tobool = icmp eq i64 %cond, 1
br i1 %tobool, label %if.else, label %if.end
if.else:
%add = add i32 %a, %b
ret i32 %add
if.end:
%sub = sub i32 %a, %b
ret i32 %sub
}
; In simple cases like this we should omit all ZA setup.
define i32 @private_za_trivially_does_not_use_za(i32 %x) "aarch64_new_za" {
; CHECK-SDAG-LABEL: private_za_trivially_does_not_use_za:
; CHECK-SDAG: // %bb.0: // %prelude
; CHECK-SDAG-NEXT: sub sp, sp, #32
; CHECK-SDAG-NEXT: str x30, [sp, #16] // 8-byte Spill
; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 32
; CHECK-SDAG-NEXT: .cfi_offset w30, -16
; CHECK-SDAG-NEXT: str w0, [sp, #12] // 4-byte Spill
; CHECK-SDAG-NEXT: rdsvl x8, #1
; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0
; CHECK-SDAG-NEXT: cbz x8, .LBB2_2
; CHECK-SDAG-NEXT: b .LBB2_1
; CHECK-SDAG-NEXT: .LBB2_1: // %save.za
; CHECK-SDAG-NEXT: bl __arm_tpidr2_save
; CHECK-SDAG-NEXT: mov x8, xzr
; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x8
; CHECK-SDAG-NEXT: b .LBB2_2
; CHECK-SDAG-NEXT: .LBB2_2:
; CHECK-SDAG-NEXT: ldr w8, [sp, #12] // 4-byte Reload
; CHECK-SDAG-NEXT: smstart za
; CHECK-SDAG-NEXT: zero {za}
; CHECK-SDAG-NEXT: add w0, w8, w8
; CHECK-SDAG-NEXT: smstop za
; CHECK-SDAG-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-SDAG-NEXT: add sp, sp, #32
; CHECK-SDAG-NEXT: ret
;
; CHECK-LABEL: private_za_trivially_does_not_use_za:
; CHECK: // %bb.0:
; CHECK-NEXT: add w0, w0, w0
; CHECK-NEXT: ret
%ret = add i32 %x, %x
ret i32 %ret
}