| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs -aarch64-new-sme-abi=false < %s | FileCheck %s --check-prefix=CHECK-SDAG |
| ; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s |
| |
| declare void @shared_za_callee() "aarch64_inout_za" |
| |
| define void @private_za() "aarch64_new_za" { |
| ; CHECK-SDAG-LABEL: private_za: |
| ; CHECK-SDAG: // %bb.0: // %prelude |
| ; CHECK-SDAG-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SDAG-NEXT: .cfi_offset w30, -16 |
| ; CHECK-SDAG-NEXT: rdsvl x8, #1 |
| ; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-SDAG-NEXT: cbz x8, .LBB0_2 |
| ; CHECK-SDAG-NEXT: b .LBB0_1 |
| ; CHECK-SDAG-NEXT: .LBB0_1: // %save.za |
| ; CHECK-SDAG-NEXT: bl __arm_tpidr2_save |
| ; CHECK-SDAG-NEXT: mov x8, xzr |
| ; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-SDAG-NEXT: b .LBB0_2 |
| ; CHECK-SDAG-NEXT: .LBB0_2: |
| ; CHECK-SDAG-NEXT: smstart za |
| ; CHECK-SDAG-NEXT: zero {za} |
| ; CHECK-SDAG-NEXT: bl shared_za_callee |
| ; CHECK-SDAG-NEXT: smstop za |
| ; CHECK-SDAG-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-SDAG-NEXT: ret |
| ; |
| ; CHECK-LABEL: private_za: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w30, -16 |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: cbnz x8, .LBB0_1 |
| ; CHECK-NEXT: b .LBB0_2 |
| ; CHECK-NEXT: .LBB0_1: |
| ; CHECK-NEXT: bl __arm_tpidr2_save |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: zero {za} |
| ; CHECK-NEXT: b .LBB0_2 |
| ; CHECK-NEXT: .LBB0_2: |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: bl shared_za_callee |
| ; CHECK-NEXT: smstop za |
| ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| call void @shared_za_callee() |
| ret void |
| } |
| |
| ; Note: This test must run at -O0 as otherwise the multiple exits are optimized out. |
| ; TODO: We should be able to omit the ZA save here (as this function does not use ZA). |
| define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za" { |
| ; CHECK-SDAG-LABEL: private_za_multiple_exit: |
| ; CHECK-SDAG: // %bb.0: // %prelude |
| ; CHECK-SDAG-NEXT: sub sp, sp, #32 |
| ; CHECK-SDAG-NEXT: str x30, [sp, #16] // 8-byte Spill |
| ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-SDAG-NEXT: .cfi_offset w30, -16 |
| ; CHECK-SDAG-NEXT: str x2, [sp] // 8-byte Spill |
| ; CHECK-SDAG-NEXT: str w1, [sp, #8] // 4-byte Spill |
| ; CHECK-SDAG-NEXT: str w0, [sp, #12] // 4-byte Spill |
| ; CHECK-SDAG-NEXT: rdsvl x8, #1 |
| ; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-SDAG-NEXT: cbz x8, .LBB1_2 |
| ; CHECK-SDAG-NEXT: b .LBB1_1 |
| ; CHECK-SDAG-NEXT: .LBB1_1: // %save.za |
| ; CHECK-SDAG-NEXT: bl __arm_tpidr2_save |
| ; CHECK-SDAG-NEXT: mov x8, xzr |
| ; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-SDAG-NEXT: b .LBB1_2 |
| ; CHECK-SDAG-NEXT: .LBB1_2: // %entry |
| ; CHECK-SDAG-NEXT: ldr x8, [sp] // 8-byte Reload |
| ; CHECK-SDAG-NEXT: smstart za |
| ; CHECK-SDAG-NEXT: zero {za} |
| ; CHECK-SDAG-NEXT: subs x8, x8, #1 |
| ; CHECK-SDAG-NEXT: b.ne .LBB1_4 |
| ; CHECK-SDAG-NEXT: b .LBB1_3 |
| ; CHECK-SDAG-NEXT: .LBB1_3: // %if.else |
| ; CHECK-SDAG-NEXT: ldr w8, [sp, #12] // 4-byte Reload |
| ; CHECK-SDAG-NEXT: ldr w9, [sp, #8] // 4-byte Reload |
| ; CHECK-SDAG-NEXT: add w0, w8, w9 |
| ; CHECK-SDAG-NEXT: smstop za |
| ; CHECK-SDAG-NEXT: ldr x30, [sp, #16] // 8-byte Reload |
| ; CHECK-SDAG-NEXT: add sp, sp, #32 |
| ; CHECK-SDAG-NEXT: ret |
| ; CHECK-SDAG-NEXT: .LBB1_4: // %if.end |
| ; CHECK-SDAG-NEXT: ldr w8, [sp, #12] // 4-byte Reload |
| ; CHECK-SDAG-NEXT: ldr w9, [sp, #8] // 4-byte Reload |
| ; CHECK-SDAG-NEXT: subs w0, w8, w9 |
| ; CHECK-SDAG-NEXT: smstop za |
| ; CHECK-SDAG-NEXT: ldr x30, [sp, #16] // 8-byte Reload |
| ; CHECK-SDAG-NEXT: add sp, sp, #32 |
| ; CHECK-SDAG-NEXT: ret |
| ; |
| ; CHECK-LABEL: private_za_multiple_exit: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: cbnz x8, .LBB1_1 |
| ; CHECK-NEXT: b .LBB1_2 |
| ; CHECK-NEXT: .LBB1_1: // %entry |
| ; CHECK-NEXT: bl __arm_tpidr2_save |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: zero {za} |
| ; CHECK-NEXT: b .LBB1_2 |
| ; CHECK-NEXT: .LBB1_2: // %entry |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: str w1, [sp, #8] // 4-byte Spill |
| ; CHECK-NEXT: str w0, [sp, #12] // 4-byte Spill |
| ; CHECK-NEXT: subs x8, x2, #1 |
| ; CHECK-NEXT: b.ne .LBB1_4 |
| ; CHECK-NEXT: b .LBB1_3 |
| ; CHECK-NEXT: .LBB1_3: // %if.else |
| ; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload |
| ; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Reload |
| ; CHECK-NEXT: add w0, w8, w9 |
| ; CHECK-NEXT: smstop za |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB1_4: // %if.end |
| ; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload |
| ; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Reload |
| ; CHECK-NEXT: subs w0, w8, w9 |
| ; CHECK-NEXT: smstop za |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| entry: |
| %tobool = icmp eq i64 %cond, 1 |
| br i1 %tobool, label %if.else, label %if.end |
| |
| if.else: |
| %add = add i32 %a, %b |
| ret i32 %add |
| |
| if.end: |
| %sub = sub i32 %a, %b |
| ret i32 %sub |
| } |
| |
| ; In simple cases like this we should omit all ZA setup. |
| define i32 @private_za_trivially_does_not_use_za(i32 %x) "aarch64_new_za" { |
| ; CHECK-SDAG-LABEL: private_za_trivially_does_not_use_za: |
| ; CHECK-SDAG: // %bb.0: // %prelude |
| ; CHECK-SDAG-NEXT: sub sp, sp, #32 |
| ; CHECK-SDAG-NEXT: str x30, [sp, #16] // 8-byte Spill |
| ; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-SDAG-NEXT: .cfi_offset w30, -16 |
| ; CHECK-SDAG-NEXT: str w0, [sp, #12] // 4-byte Spill |
| ; CHECK-SDAG-NEXT: rdsvl x8, #1 |
| ; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-SDAG-NEXT: cbz x8, .LBB2_2 |
| ; CHECK-SDAG-NEXT: b .LBB2_1 |
| ; CHECK-SDAG-NEXT: .LBB2_1: // %save.za |
| ; CHECK-SDAG-NEXT: bl __arm_tpidr2_save |
| ; CHECK-SDAG-NEXT: mov x8, xzr |
| ; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-SDAG-NEXT: b .LBB2_2 |
| ; CHECK-SDAG-NEXT: .LBB2_2: |
| ; CHECK-SDAG-NEXT: ldr w8, [sp, #12] // 4-byte Reload |
| ; CHECK-SDAG-NEXT: smstart za |
| ; CHECK-SDAG-NEXT: zero {za} |
| ; CHECK-SDAG-NEXT: add w0, w8, w8 |
| ; CHECK-SDAG-NEXT: smstop za |
| ; CHECK-SDAG-NEXT: ldr x30, [sp, #16] // 8-byte Reload |
| ; CHECK-SDAG-NEXT: add sp, sp, #32 |
| ; CHECK-SDAG-NEXT: ret |
| ; |
| ; CHECK-LABEL: private_za_trivially_does_not_use_za: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: add w0, w0, w0 |
| ; CHECK-NEXT: ret |
| %ret = add i32 %x, %x |
| ret i32 %ret |
| } |