| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK |
| ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -aarch64-new-sme-abi < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-NEWLOWERING |
| |
| declare void @private_za_call() |
| declare void @shared_za_call() "aarch64_inout_za" |
| |
| define void @private_za_loop(i32 %n) "aarch64_inout_za" nounwind { |
| ; CHECK-LABEL: private_za_loop: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: cmp w0, #1 |
| ; CHECK-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEXT: b.lt .LBB0_5 |
| ; CHECK-NEXT: // %bb.1: // %loop.preheader |
| ; CHECK-NEXT: mov w19, w0 |
| ; CHECK-NEXT: sub x20, x29, #16 |
| ; CHECK-NEXT: b .LBB0_3 |
| ; CHECK-NEXT: .LBB0_2: // %loop |
| ; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 |
| ; CHECK-NEXT: subs w19, w19, #1 |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: b.eq .LBB0_5 |
| ; CHECK-NEXT: .LBB0_3: // %loop |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_call |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB0_2 |
| ; CHECK-NEXT: // %bb.4: // %loop |
| ; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: b .LBB0_2 |
| ; CHECK-NEXT: .LBB0_5: // %exit |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: private_za_loop: |
| ; CHECK-NEWLOWERING: // %bb.0: // %entry |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cmp w0, #1 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10 |
| ; CHECK-NEWLOWERING-NEXT: b.lt .LBB0_3 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %loop.preheader |
| ; CHECK-NEWLOWERING-NEXT: mov w19, w0 |
| ; CHECK-NEWLOWERING-NEXT: .LBB0_2: // %loop |
| ; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_call |
| ; CHECK-NEWLOWERING-NEXT: subs w19, w19, #1 |
| ; CHECK-NEWLOWERING-NEXT: b.ne .LBB0_2 |
| ; CHECK-NEWLOWERING-NEXT: .LBB0_3: // %exit |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB0_5 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: .LBB0_5: // %exit |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| entry: |
| %cmpgt = icmp sgt i32 %n, 0 |
| br i1 %cmpgt, label %loop, label %exit |
| |
| loop: |
| %iv = phi i32 [ %next_iv, %loop ], [ 0, %entry ] |
| tail call void @private_za_call() |
| %next_iv = add nuw nsw i32 %iv, 1 |
| %cmpeq = icmp eq i32 %next_iv, %n |
| br i1 %cmpeq, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; FIXME: In the new lowering we could weight edges to avoid doing the lazy save in the loop. |
| define void @private_za_loop_active_entry_and_exit(i32 %n) "aarch64_inout_za" nounwind { |
| ; CHECK-LABEL: private_za_loop_active_entry_and_exit: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: mov w19, w0 |
| ; CHECK-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEXT: bl shared_za_call |
| ; CHECK-NEXT: cmp w19, #1 |
| ; CHECK-NEXT: b.lt .LBB1_5 |
| ; CHECK-NEXT: // %bb.1: // %loop.preheader |
| ; CHECK-NEXT: sub x20, x29, #16 |
| ; CHECK-NEXT: b .LBB1_3 |
| ; CHECK-NEXT: .LBB1_2: // %loop |
| ; CHECK-NEXT: // in Loop: Header=BB1_3 Depth=1 |
| ; CHECK-NEXT: subs w19, w19, #1 |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: b.eq .LBB1_5 |
| ; CHECK-NEXT: .LBB1_3: // %loop |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_call |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB1_2 |
| ; CHECK-NEXT: // %bb.4: // %loop |
| ; CHECK-NEXT: // in Loop: Header=BB1_3 Depth=1 |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: b .LBB1_2 |
| ; CHECK-NEXT: .LBB1_5: // %exit |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: b shared_za_call |
| ; |
| ; CHECK-NEWLOWERING-LABEL: private_za_loop_active_entry_and_exit: |
| ; CHECK-NEWLOWERING: // %bb.0: // %entry |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov w19, w0 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEWLOWERING-NEXT: bl shared_za_call |
| ; CHECK-NEWLOWERING-NEXT: cmp w19, #1 |
| ; CHECK-NEWLOWERING-NEXT: b.lt .LBB1_5 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %loop.preheader |
| ; CHECK-NEWLOWERING-NEXT: sub x20, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: b .LBB1_3 |
| ; CHECK-NEWLOWERING-NEXT: .LBB1_2: // %loop |
| ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB1_3 Depth=1 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: cbz w19, .LBB1_5 |
| ; CHECK-NEWLOWERING-NEXT: .LBB1_3: // %loop |
| ; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_call |
| ; CHECK-NEWLOWERING-NEXT: sub w19, w19, #1 |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB1_2 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.4: // %loop |
| ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB1_3 Depth=1 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: b .LBB1_2 |
| ; CHECK-NEWLOWERING-NEXT: .LBB1_5: // %exit |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: b shared_za_call |
| entry: |
| %cmpgt = icmp sgt i32 %n, 0 |
| tail call void @shared_za_call() |
| br i1 %cmpgt, label %loop, label %exit |
| |
| loop: |
| %iv = phi i32 [ %next_iv, %loop ], [ 0, %entry ] |
| tail call void @private_za_call() |
| %next_iv = add nuw nsw i32 %iv, 1 |
| %cmpeq = icmp eq i32 %next_iv, %n |
| br i1 %cmpeq, label %exit, label %loop |
| |
| exit: |
| tail call void @shared_za_call() |
| ret void |
| } |
| |
| define void @shared_za_loop(i32 %n) "aarch64_inout_za" nounwind { |
| ; CHECK-COMMON-LABEL: shared_za_loop: |
| ; CHECK-COMMON: // %bb.0: // %entry |
| ; CHECK-COMMON-NEXT: cmp w0, #1 |
| ; CHECK-COMMON-NEXT: b.lt .LBB2_4 |
| ; CHECK-COMMON-NEXT: // %bb.1: // %loop.preheader |
| ; CHECK-COMMON-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-COMMON-NEXT: mov w19, w0 |
| ; CHECK-COMMON-NEXT: .LBB2_2: // %loop |
| ; CHECK-COMMON-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-COMMON-NEXT: bl shared_za_call |
| ; CHECK-COMMON-NEXT: subs w19, w19, #1 |
| ; CHECK-COMMON-NEXT: b.ne .LBB2_2 |
| ; CHECK-COMMON-NEXT: // %bb.3: |
| ; CHECK-COMMON-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-COMMON-NEXT: .LBB2_4: // %exit |
| ; CHECK-COMMON-NEXT: ret |
| entry: |
| %cmpgt = icmp sgt i32 %n, 0 |
| br i1 %cmpgt, label %loop, label %exit |
| |
| loop: |
| %iv = phi i32 [ %next_iv, %loop ], [ 0, %entry ] |
| tail call void @shared_za_call() |
| %next_iv = add nuw nsw i32 %iv, 1 |
| %cmpeq = icmp eq i32 %next_iv, %n |
| br i1 %cmpeq, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; FIXME: The codegen for this case could be improved (by tuning weights). |
| ; Here the ZA save has been hoisted out of the conditional, but would be better |
| ; to sink it. |
| define void @cond_private_za_call(i1 %cond) "aarch64_inout_za" nounwind { |
| ; CHECK-LABEL: cond_private_za_call: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEXT: tbz w0, #0, .LBB3_4 |
| ; CHECK-NEXT: // %bb.1: // %private_za_call |
| ; CHECK-NEXT: sub x8, x29, #16 |
| ; CHECK-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEXT: bl private_za_call |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB3_3 |
| ; CHECK-NEXT: // %bb.2: // %private_za_call |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB3_3: // %private_za_call |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: .LBB3_4: // %exit |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: b shared_za_call |
| ; |
| ; CHECK-NEWLOWERING-LABEL: cond_private_za_call: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10 |
| ; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB3_2 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %private_za_call |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_call |
| ; CHECK-NEWLOWERING-NEXT: .LBB3_2: // %exit |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB3_4 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: .LBB3_4: // %exit |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: b shared_za_call |
| br i1 %cond, label %private_za_call, label %exit |
| |
| private_za_call: |
| tail call void @private_za_call() |
| br label %exit |
| |
| exit: |
| tail call void @shared_za_call() |
| ret void |
| } |
| |
| define void @mixed_shared_private_za_loop(ptr %cond) "aarch64_inout_za" nounwind { |
| ; CHECK-LABEL: mixed_shared_private_za_loop: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: mov x19, x0 |
| ; CHECK-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: sub x20, x29, #16 |
| ; CHECK-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEXT: b .LBB4_2 |
| ; CHECK-NEXT: .LBB4_1: // %loop |
| ; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1 |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: ldrb w8, [x19] |
| ; CHECK-NEXT: tbz w8, #0, .LBB4_4 |
| ; CHECK-NEXT: .LBB4_2: // %loop |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: bl shared_za_call |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_call |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB4_1 |
| ; CHECK-NEXT: // %bb.3: // %loop |
| ; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1 |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: b .LBB4_1 |
| ; CHECK-NEXT: .LBB4_4: // %exit |
| ; CHECK-NEXT: bl shared_za_call |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: mixed_shared_private_za_loop: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov x19, x0 |
| ; CHECK-NEWLOWERING-NEXT: sub x20, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEWLOWERING-NEXT: b .LBB4_2 |
| ; CHECK-NEWLOWERING-NEXT: .LBB4_1: // %loop |
| ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB4_2 Depth=1 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: tbz w8, #0, .LBB4_4 |
| ; CHECK-NEWLOWERING-NEXT: .LBB4_2: // %loop |
| ; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEWLOWERING-NEXT: bl shared_za_call |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_call |
| ; CHECK-NEWLOWERING-NEXT: ldrb w8, [x19] |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x9, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x9, .LBB4_1 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.3: // %loop |
| ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB4_2 Depth=1 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: b .LBB4_1 |
| ; CHECK-NEWLOWERING-NEXT: .LBB4_4: // %exit |
| ; CHECK-NEWLOWERING-NEXT: bl shared_za_call |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| br label %loop |
| |
| loop: |
| call void @shared_za_call() |
| call void @private_za_call() |
| br label %latch |
| |
| latch: |
| %bool = load volatile i8, ptr %cond, align 1 |
| %trunc = trunc i8 %bool to i1 |
| br i1 %trunc, label %loop, label %exit |
| |
| exit: |
| call void @shared_za_call() |
| ret void |
| } |
| |
| |
| define void @cond_clobber_followed_by_clobber(i1 %cond) "aarch64_inout_za" nounwind { |
| ; CHECK-LABEL: cond_clobber_followed_by_clobber: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: mov w19, w0 |
| ; CHECK-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEXT: bl shared_za_call |
| ; CHECK-NEXT: tbz w19, #0, .LBB5_4 |
| ; CHECK-NEXT: // %bb.1: // %cond_clobber |
| ; CHECK-NEXT: sub x8, x29, #16 |
| ; CHECK-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEXT: bl private_za_call |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB5_3 |
| ; CHECK-NEXT: // %bb.2: // %cond_clobber |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB5_3: // %cond_clobber |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: .LBB5_4: // %exit |
| ; CHECK-NEXT: sub x8, x29, #16 |
| ; CHECK-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEXT: bl private_za_call |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB5_6 |
| ; CHECK-NEXT: // %bb.5: // %exit |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB5_6: // %exit |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: b shared_za_call |
| ; |
| ; CHECK-NEWLOWERING-LABEL: cond_clobber_followed_by_clobber: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov w19, w0 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEWLOWERING-NEXT: bl shared_za_call |
| ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEWLOWERING-NEXT: tbz w19, #0, .LBB5_2 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %cond_clobber |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_call |
| ; CHECK-NEWLOWERING-NEXT: .LBB5_2: // %exit |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_call |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB5_4 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: .LBB5_4: // %exit |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: b shared_za_call |
| tail call void @shared_za_call() |
| br i1 %cond, label %cond_clobber, label %exit |
| |
| cond_clobber: |
| tail call void @private_za_call() |
| br label %exit |
| |
| exit: |
| tail call void @private_za_call() |
| tail call void @shared_za_call() |
| ret void |
| } |
| |
| define void @conditionally_use_za(i1 %cond) "aarch64_inout_za" nounwind { |
| ; CHECK-COMMON-LABEL: conditionally_use_za: |
| ; CHECK-COMMON: // %bb.0: |
| ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-COMMON-NEXT: mov x29, sp |
| ; CHECK-COMMON-NEXT: sub sp, sp, #16 |
| ; CHECK-COMMON-NEXT: rdsvl x8, #1 |
| ; CHECK-COMMON-NEXT: mov x9, sp |
| ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-COMMON-NEXT: mov sp, x9 |
| ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-COMMON-NEXT: tbz w0, #0, .LBB6_4 |
| ; CHECK-COMMON-NEXT: // %bb.1: // %use_za |
| ; CHECK-COMMON-NEXT: bl shared_za_call |
| ; CHECK-COMMON-NEXT: sub x8, x29, #16 |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-COMMON-NEXT: bl private_za_call |
| ; CHECK-COMMON-NEXT: smstart za |
| ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-COMMON-NEXT: sub x0, x29, #16 |
| ; CHECK-COMMON-NEXT: cbnz x8, .LBB6_3 |
| ; CHECK-COMMON-NEXT: // %bb.2: // %use_za |
| ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-COMMON-NEXT: .LBB6_3: // %use_za |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-COMMON-NEXT: .LBB6_4: // %exit |
| ; CHECK-COMMON-NEXT: mov sp, x29 |
| ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ret |
| br i1 %cond, label %use_za, label %exit |
| |
| use_za: |
| tail call void @shared_za_call() |
| tail call void @private_za_call() |
| br label %exit |
| |
| exit: |
| ret void |
| } |
| |
| |
| define void @diamond_mixed_za_merge_shared(i1 %cond) "aarch64_inout_za" nounwind { |
| ; CHECK-COMMON-LABEL: diamond_mixed_za_merge_shared: |
| ; CHECK-COMMON: // %bb.0: // %entry |
| ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-COMMON-NEXT: mov x29, sp |
| ; CHECK-COMMON-NEXT: sub sp, sp, #16 |
| ; CHECK-COMMON-NEXT: rdsvl x8, #1 |
| ; CHECK-COMMON-NEXT: mov x9, sp |
| ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-COMMON-NEXT: mov sp, x9 |
| ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-COMMON-NEXT: tbz w0, #0, .LBB7_2 |
| ; CHECK-COMMON-NEXT: // %bb.1: // %then |
| ; CHECK-COMMON-NEXT: bl shared_za_call |
| ; CHECK-COMMON-NEXT: b .LBB7_5 |
| ; CHECK-COMMON-NEXT: .LBB7_2: // %else |
| ; CHECK-COMMON-NEXT: sub x8, x29, #16 |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-COMMON-NEXT: bl private_za_call |
| ; CHECK-COMMON-NEXT: smstart za |
| ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-COMMON-NEXT: sub x0, x29, #16 |
| ; CHECK-COMMON-NEXT: cbnz x8, .LBB7_4 |
| ; CHECK-COMMON-NEXT: // %bb.3: // %else |
| ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-COMMON-NEXT: .LBB7_4: // %else |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-COMMON-NEXT: .LBB7_5: // %merge_shared |
| ; CHECK-COMMON-NEXT: bl shared_za_call |
| ; CHECK-COMMON-NEXT: mov sp, x29 |
| ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ret |
| entry: |
| br i1 %cond, label %then, label %else |
| |
| then: |
| call void @shared_za_call() |
| br label %merge_shared |
| |
| else: |
| call void @private_za_call() |
| br label %merge_shared |
| |
| merge_shared: |
| call void @shared_za_call() |
| ret void |
| } |
| |
| |
| define void @diamond_mixed_za_merge_private(i1 %cond) "aarch64_inout_za" nounwind { |
| ; CHECK-LABEL: diamond_mixed_za_merge_private: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEXT: tbz w0, #0, .LBB8_2 |
| ; CHECK-NEXT: // %bb.1: // %then |
| ; CHECK-NEXT: bl shared_za_call |
| ; CHECK-NEXT: b .LBB8_5 |
| ; CHECK-NEXT: .LBB8_2: // %else |
| ; CHECK-NEXT: sub x8, x29, #16 |
| ; CHECK-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEXT: bl private_za_call |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB8_4 |
| ; CHECK-NEXT: // %bb.3: // %else |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB8_4: // %else |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: .LBB8_5: // %merge_private_za |
| ; CHECK-NEXT: sub x8, x29, #16 |
| ; CHECK-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEXT: bl private_za_call |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB8_7 |
| ; CHECK-NEXT: // %bb.6: // %merge_private_za |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB8_7: // %merge_private_za |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: diamond_mixed_za_merge_private: |
| ; CHECK-NEWLOWERING: // %bb.0: // %entry |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB8_2 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %then |
| ; CHECK-NEWLOWERING-NEXT: bl shared_za_call |
| ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEWLOWERING-NEXT: b .LBB8_3 |
| ; CHECK-NEWLOWERING-NEXT: .LBB8_2: // %else |
| ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_call |
| ; CHECK-NEWLOWERING-NEXT: .LBB8_3: // %merge_private_za |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_call |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB8_5 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.4: // %merge_private_za |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: .LBB8_5: // %merge_private_za |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| entry: |
| br i1 %cond, label %then, label %else |
| |
| then: |
| call void @shared_za_call() |
| br label %merge_private_za |
| |
| else: |
| call void @private_za_call() |
| br label %merge_private_za |
| |
| merge_private_za: |
| call void @private_za_call() |
| ret void |
| } |
| |
| define void @critical_edge_mixed_za(i1 %c1, i1 %c2) "aarch64_inout_za" nounwind { |
| ; CHECK-LABEL: critical_edge_mixed_za: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: mov w19, w1 |
| ; CHECK-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEXT: tbz w0, #0, .LBB9_5 |
| ; CHECK-NEXT: // %bb.1: // %shared_path |
| ; CHECK-NEXT: bl shared_za_call |
| ; CHECK-NEXT: tbz w19, #0, .LBB9_8 |
| ; CHECK-NEXT: .LBB9_2: // %exit_private |
| ; CHECK-NEXT: sub x8, x29, #16 |
| ; CHECK-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEXT: bl private_za_call |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB9_4 |
| ; CHECK-NEXT: // %bb.3: // %exit_private |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB9_4: // %exit_private |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: b .LBB9_9 |
| ; CHECK-NEXT: .LBB9_5: // %private_path |
| ; CHECK-NEXT: sub x8, x29, #16 |
| ; CHECK-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEXT: bl private_za_call |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB9_7 |
| ; CHECK-NEXT: // %bb.6: // %private_path |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB9_7: // %private_path |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: tbnz w19, #0, .LBB9_2 |
| ; CHECK-NEXT: .LBB9_8: // %exit_shared |
| ; CHECK-NEXT: bl shared_za_call |
| ; CHECK-NEXT: .LBB9_9: // %common.ret |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: critical_edge_mixed_za: |
| ; CHECK-NEWLOWERING: // %bb.0: // %entry |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov w19, w1 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB9_5 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %shared_path |
| ; CHECK-NEWLOWERING-NEXT: bl shared_za_call |
| ; CHECK-NEWLOWERING-NEXT: tbz w19, #0, .LBB9_8 |
| ; CHECK-NEWLOWERING-NEXT: .LBB9_2: // %exit_private |
| ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_call |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB9_4 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit_private |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: .LBB9_4: // %exit_private |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: b .LBB9_9 |
| ; CHECK-NEWLOWERING-NEXT: .LBB9_5: // %private_path |
| ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_call |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB9_7 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.6: // %private_path |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: .LBB9_7: // %private_path |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: tbnz w19, #0, .LBB9_2 |
| ; CHECK-NEWLOWERING-NEXT: .LBB9_8: // %exit_shared |
| ; CHECK-NEWLOWERING-NEXT: bl shared_za_call |
| ; CHECK-NEWLOWERING-NEXT: .LBB9_9: // %common.ret |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| entry: |
| br i1 %c1, label %shared_path, label %private_path |
| |
| shared_path: |
| call void @shared_za_call() |
| br label %merge |
| |
| private_path: |
| call void @private_za_call() |
| br label %merge |
| |
| merge: |
| br i1 %c2, label %exit_private, label %exit_shared |
| |
| exit_private: |
| call void @private_za_call() |
| ret void |
| |
| exit_shared: |
| call void @shared_za_call() |
| ret void |
| } |
| |
| define void @nested_cond_in_loop(i32 %n, i1 %cond) "aarch64_inout_za" nounwind { |
| ; CHECK-COMMON-LABEL: nested_cond_in_loop: |
| ; CHECK-COMMON: // %bb.0: // %entry |
| ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill |
| ; CHECK-COMMON-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-COMMON-NEXT: mov x29, sp |
| ; CHECK-COMMON-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-COMMON-NEXT: sub sp, sp, #16 |
| ; CHECK-COMMON-NEXT: rdsvl x8, #1 |
| ; CHECK-COMMON-NEXT: mov x9, sp |
| ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-COMMON-NEXT: mov sp, x9 |
| ; CHECK-COMMON-NEXT: cmp w0, #1 |
| ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-COMMON-NEXT: b.lt .LBB10_8 |
| ; CHECK-COMMON-NEXT: // %bb.1: // %loop.preheader |
| ; CHECK-COMMON-NEXT: mov w19, w1 |
| ; CHECK-COMMON-NEXT: mov w20, w0 |
| ; CHECK-COMMON-NEXT: mov w21, wzr |
| ; CHECK-COMMON-NEXT: sub x22, x29, #16 |
| ; CHECK-COMMON-NEXT: b .LBB10_4 |
| ; CHECK-COMMON-NEXT: .LBB10_2: // %use_shared |
| ; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1 |
| ; CHECK-COMMON-NEXT: bl shared_za_call |
| ; CHECK-COMMON-NEXT: .LBB10_3: // %latch |
| ; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1 |
| ; CHECK-COMMON-NEXT: add w21, w21, #1 |
| ; CHECK-COMMON-NEXT: cmp w21, w20 |
| ; CHECK-COMMON-NEXT: b.ge .LBB10_8 |
| ; CHECK-COMMON-NEXT: .LBB10_4: // %loop |
| ; CHECK-COMMON-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-COMMON-NEXT: tbnz w19, #0, .LBB10_2 |
| ; CHECK-COMMON-NEXT: // %bb.5: // %use_private |
| ; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1 |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x22 |
| ; CHECK-COMMON-NEXT: bl private_za_call |
| ; CHECK-COMMON-NEXT: smstart za |
| ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-COMMON-NEXT: sub x0, x29, #16 |
| ; CHECK-COMMON-NEXT: cbnz x8, .LBB10_7 |
| ; CHECK-COMMON-NEXT: // %bb.6: // %use_private |
| ; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1 |
| ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-COMMON-NEXT: .LBB10_7: // %use_private |
| ; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1 |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-COMMON-NEXT: b .LBB10_3 |
| ; CHECK-COMMON-NEXT: .LBB10_8: // %exit |
| ; CHECK-COMMON-NEXT: mov sp, x29 |
| ; CHECK-COMMON-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ret |
| entry: |
| %cmp = icmp sgt i32 %n, 0 |
| br i1 %cmp, label %loop, label %exit |
| |
| loop: |
| %iv = phi i32 [ 0, %entry ], [ %inc, %latch ] |
| br i1 %cond, label %use_shared, label %use_private |
| |
| use_shared: |
| call void @shared_za_call() |
| br label %latch |
| |
| use_private: |
| call void @private_za_call() |
| br label %latch |
| |
| latch: |
| %inc = add i32 %iv, 1 |
| %cmp2 = icmp slt i32 %inc, %n |
| br i1 %cmp2, label %loop, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwind { |
| ; CHECK-LABEL: loop_with_external_entry: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: mov w19, w1 |
| ; CHECK-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEXT: tbz w0, #0, .LBB11_2 |
| ; CHECK-NEXT: // %bb.1: // %init |
| ; CHECK-NEXT: bl shared_za_call |
| ; CHECK-NEXT: .LBB11_2: // %loop.preheader |
| ; CHECK-NEXT: sub x20, x29, #16 |
| ; CHECK-NEXT: b .LBB11_4 |
| ; CHECK-NEXT: .LBB11_3: // %loop |
| ; CHECK-NEXT: // in Loop: Header=BB11_4 Depth=1 |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: tbz w19, #0, .LBB11_6 |
| ; CHECK-NEXT: .LBB11_4: // %loop |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_call |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB11_3 |
| ; CHECK-NEXT: // %bb.5: // %loop |
| ; CHECK-NEXT: // in Loop: Header=BB11_4 Depth=1 |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: b .LBB11_3 |
| ; CHECK-NEXT: .LBB11_6: // %exit |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: loop_with_external_entry: |
| ; CHECK-NEWLOWERING: // %bb.0: // %entry |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov w19, w1 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB11_2 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %init |
| ; CHECK-NEWLOWERING-NEXT: bl shared_za_call |
| ; CHECK-NEWLOWERING-NEXT: .LBB11_2: // %loop.preheader |
| ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEWLOWERING-NEXT: .LBB11_3: // %loop |
| ; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_call |
| ; CHECK-NEWLOWERING-NEXT: tbnz w19, #0, .LBB11_3 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_6 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.5: // %exit |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: .LBB11_6: // %exit |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| entry: |
| br i1 %c1, label %init, label %loop |
| |
| init: |
| call void @shared_za_call() |
| br label %loop |
| |
| loop: |
| call void @private_za_call() |
| br i1 %c2, label %loop, label %exit |
| |
| exit: |
| ret void |
| } |