| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s |
| |
| declare void @shared_za_callee() "aarch64_inout_za" |
| |
| define void @private_za() "aarch64_new_za" { |
| ; CHECK-LABEL: private_za: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: .cfi_offset w30, -16 |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: cbnz x8, .LBB0_1 |
| ; CHECK-NEXT: b .LBB0_2 |
| ; CHECK-NEXT: .LBB0_1: |
| ; CHECK-NEXT: bl __arm_tpidr2_save |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: zero {za} |
| ; CHECK-NEXT: b .LBB0_2 |
| ; CHECK-NEXT: .LBB0_2: |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: bl shared_za_callee |
| ; CHECK-NEXT: smstop za |
| ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| call void @shared_za_callee() |
| ret void |
| } |
| |
| ; Note: This test must run at -O0 as otherwise the multiple exits are optimized out. |
| define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_new_za" { |
| ; CHECK-LABEL: private_za_multiple_exit: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: str w1, [sp, #8] // 4-byte Spill |
| ; CHECK-NEXT: str w0, [sp, #12] // 4-byte Spill |
| ; CHECK-NEXT: subs x8, x2, #1 |
| ; CHECK-NEXT: b.ne .LBB1_2 |
| ; CHECK-NEXT: b .LBB1_1 |
| ; CHECK-NEXT: .LBB1_1: // %if.else |
| ; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload |
| ; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Reload |
| ; CHECK-NEXT: add w0, w8, w9 |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .LBB1_2: // %if.end |
| ; CHECK-NEXT: ldr w8, [sp, #12] // 4-byte Reload |
| ; CHECK-NEXT: ldr w9, [sp, #8] // 4-byte Reload |
| ; CHECK-NEXT: subs w0, w8, w9 |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| entry: |
| %tobool = icmp eq i64 %cond, 1 |
| br i1 %tobool, label %if.else, label %if.end |
| |
| if.else: |
| %add = add i32 %a, %b |
| ret i32 %add |
| |
| if.end: |
| %sub = sub i32 %a, %b |
| ret i32 %sub |
| } |
| |
| ; In simple cases like this we should omit all ZA setup. |
| define i32 @private_za_trivially_does_not_use_za(i32 %x) "aarch64_new_za" { |
| ; CHECK-LABEL: private_za_trivially_does_not_use_za: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: add w0, w0, w0 |
| ; CHECK-NEXT: ret |
| %ret = add i32 %x, %x |
| ret i32 %ret |
| } |