blob: 18ea07e38fe89bae13be2a6da72494e3f2f811ff [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -aarch64-new-sme-abi < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-NEWLOWERING
declare void @private_za_call()
declare void @shared_za_call() "aarch64_inout_za"
define void @private_za_loop(i32 %n) "aarch64_inout_za" nounwind {
; CHECK-LABEL: private_za_loop:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: cmp w0, #1
; CHECK-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEXT: b.lt .LBB0_5
; CHECK-NEXT: // %bb.1: // %loop.preheader
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: sub x20, x29, #16
; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: .LBB0_2: // %loop
; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: subs w19, w19, #1
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: b.eq .LBB0_5
; CHECK-NEXT: .LBB0_3: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_call
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB0_2
; CHECK-NEXT: // %bb.4: // %loop
; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_5: // %exit
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: private_za_loop:
; CHECK-NEWLOWERING: // %bb.0: // %entry
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
; CHECK-NEWLOWERING-NEXT: cmp w0, #1
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEWLOWERING-NEXT: b.lt .LBB0_3
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %loop.preheader
; CHECK-NEWLOWERING-NEXT: mov w19, w0
; CHECK-NEWLOWERING-NEXT: .LBB0_2: // %loop
; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEWLOWERING-NEXT: bl private_za_call
; CHECK-NEWLOWERING-NEXT: subs w19, w19, #1
; CHECK-NEWLOWERING-NEXT: b.ne .LBB0_2
; CHECK-NEWLOWERING-NEXT: .LBB0_3: // %exit
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB0_5
; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB0_5: // %exit
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
entry:
%cmpgt = icmp sgt i32 %n, 0
br i1 %cmpgt, label %loop, label %exit
loop:
%iv = phi i32 [ %next_iv, %loop ], [ 0, %entry ]
tail call void @private_za_call()
%next_iv = add nuw nsw i32 %iv, 1
%cmpeq = icmp eq i32 %next_iv, %n
br i1 %cmpeq, label %exit, label %loop
exit:
ret void
}
; FIXME: In the new lowering we could weight edges to avoid doing the lazy save in the loop.
define void @private_za_loop_active_entry_and_exit(i32 %n) "aarch64_inout_za" nounwind {
; CHECK-LABEL: private_za_loop_active_entry_and_exit:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEXT: bl shared_za_call
; CHECK-NEXT: cmp w19, #1
; CHECK-NEXT: b.lt .LBB1_5
; CHECK-NEXT: // %bb.1: // %loop.preheader
; CHECK-NEXT: sub x20, x29, #16
; CHECK-NEXT: b .LBB1_3
; CHECK-NEXT: .LBB1_2: // %loop
; CHECK-NEXT: // in Loop: Header=BB1_3 Depth=1
; CHECK-NEXT: subs w19, w19, #1
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: b.eq .LBB1_5
; CHECK-NEXT: .LBB1_3: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_call
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB1_2
; CHECK-NEXT: // %bb.4: // %loop
; CHECK-NEXT: // in Loop: Header=BB1_3 Depth=1
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_5: // %exit
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: b shared_za_call
;
; CHECK-NEWLOWERING-LABEL: private_za_loop_active_entry_and_exit:
; CHECK-NEWLOWERING: // %bb.0: // %entry
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: mov w19, w0
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEWLOWERING-NEXT: bl shared_za_call
; CHECK-NEWLOWERING-NEXT: cmp w19, #1
; CHECK-NEWLOWERING-NEXT: b.lt .LBB1_5
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %loop.preheader
; CHECK-NEWLOWERING-NEXT: sub x20, x29, #16
; CHECK-NEWLOWERING-NEXT: b .LBB1_3
; CHECK-NEWLOWERING-NEXT: .LBB1_2: // %loop
; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB1_3 Depth=1
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: cbz w19, .LBB1_5
; CHECK-NEWLOWERING-NEXT: .LBB1_3: // %loop
; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEWLOWERING-NEXT: bl private_za_call
; CHECK-NEWLOWERING-NEXT: sub w19, w19, #1
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB1_2
; CHECK-NEWLOWERING-NEXT: // %bb.4: // %loop
; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB1_3 Depth=1
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: b .LBB1_2
; CHECK-NEWLOWERING-NEXT: .LBB1_5: // %exit
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: b shared_za_call
entry:
%cmpgt = icmp sgt i32 %n, 0
tail call void @shared_za_call()
br i1 %cmpgt, label %loop, label %exit
loop:
%iv = phi i32 [ %next_iv, %loop ], [ 0, %entry ]
tail call void @private_za_call()
%next_iv = add nuw nsw i32 %iv, 1
%cmpeq = icmp eq i32 %next_iv, %n
br i1 %cmpeq, label %exit, label %loop
exit:
tail call void @shared_za_call()
ret void
}
define void @shared_za_loop(i32 %n) "aarch64_inout_za" nounwind {
; CHECK-COMMON-LABEL: shared_za_loop:
; CHECK-COMMON: // %bb.0: // %entry
; CHECK-COMMON-NEXT: cmp w0, #1
; CHECK-COMMON-NEXT: b.lt .LBB2_4
; CHECK-COMMON-NEXT: // %bb.1: // %loop.preheader
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; CHECK-COMMON-NEXT: mov w19, w0
; CHECK-COMMON-NEXT: .LBB2_2: // %loop
; CHECK-COMMON-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-COMMON-NEXT: bl shared_za_call
; CHECK-COMMON-NEXT: subs w19, w19, #1
; CHECK-COMMON-NEXT: b.ne .LBB2_2
; CHECK-COMMON-NEXT: // %bb.3:
; CHECK-COMMON-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: .LBB2_4: // %exit
; CHECK-COMMON-NEXT: ret
entry:
%cmpgt = icmp sgt i32 %n, 0
br i1 %cmpgt, label %loop, label %exit
loop:
%iv = phi i32 [ %next_iv, %loop ], [ 0, %entry ]
tail call void @shared_za_call()
%next_iv = add nuw nsw i32 %iv, 1
%cmpeq = icmp eq i32 %next_iv, %n
br i1 %cmpeq, label %exit, label %loop
exit:
ret void
}
; FIXME: The codegen for this case could be improved (by tuning weights).
; Here the ZA save has been hoisted out of the conditional, but would be better
; to sink it.
define void @cond_private_za_call(i1 %cond) "aarch64_inout_za" nounwind {
; CHECK-LABEL: cond_private_za_call:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEXT: tbz w0, #0, .LBB3_4
; CHECK-NEXT: // %bb.1: // %private_za_call
; CHECK-NEXT: sub x8, x29, #16
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl private_za_call
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB3_3
; CHECK-NEXT: // %bb.2: // %private_za_call
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB3_3: // %private_za_call
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: .LBB3_4: // %exit
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: b shared_za_call
;
; CHECK-NEWLOWERING-LABEL: cond_private_za_call:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB3_2
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %private_za_call
; CHECK-NEWLOWERING-NEXT: bl private_za_call
; CHECK-NEWLOWERING-NEXT: .LBB3_2: // %exit
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB3_4
; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB3_4: // %exit
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: b shared_za_call
br i1 %cond, label %private_za_call, label %exit
private_za_call:
tail call void @private_za_call()
br label %exit
exit:
tail call void @shared_za_call()
ret void
}
define void @mixed_shared_private_za_loop(ptr %cond) "aarch64_inout_za" nounwind {
; CHECK-LABEL: mixed_shared_private_za_loop:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: sub x20, x29, #16
; CHECK-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEXT: b .LBB4_2
; CHECK-NEXT: .LBB4_1: // %loop
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: ldrb w8, [x19]
; CHECK-NEXT: tbz w8, #0, .LBB4_4
; CHECK-NEXT: .LBB4_2: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: bl shared_za_call
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_call
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB4_1
; CHECK-NEXT: // %bb.3: // %loop
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: b .LBB4_1
; CHECK-NEXT: .LBB4_4: // %exit
; CHECK-NEXT: bl shared_za_call
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: mixed_shared_private_za_loop:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: mov x19, x0
; CHECK-NEWLOWERING-NEXT: sub x20, x29, #16
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEWLOWERING-NEXT: b .LBB4_2
; CHECK-NEWLOWERING-NEXT: .LBB4_1: // %loop
; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB4_2 Depth=1
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: tbz w8, #0, .LBB4_4
; CHECK-NEWLOWERING-NEXT: .LBB4_2: // %loop
; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEWLOWERING-NEXT: bl shared_za_call
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEWLOWERING-NEXT: bl private_za_call
; CHECK-NEWLOWERING-NEXT: ldrb w8, [x19]
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x9, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x9, .LBB4_1
; CHECK-NEWLOWERING-NEXT: // %bb.3: // %loop
; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB4_2 Depth=1
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: b .LBB4_1
; CHECK-NEWLOWERING-NEXT: .LBB4_4: // %exit
; CHECK-NEWLOWERING-NEXT: bl shared_za_call
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
br label %loop
loop:
call void @shared_za_call()
call void @private_za_call()
br label %latch
latch:
%bool = load volatile i8, ptr %cond, align 1
%trunc = trunc i8 %bool to i1
br i1 %trunc, label %loop, label %exit
exit:
call void @shared_za_call()
ret void
}
define void @cond_clobber_followed_by_clobber(i1 %cond) "aarch64_inout_za" nounwind {
; CHECK-LABEL: cond_clobber_followed_by_clobber:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEXT: bl shared_za_call
; CHECK-NEXT: tbz w19, #0, .LBB5_4
; CHECK-NEXT: // %bb.1: // %cond_clobber
; CHECK-NEXT: sub x8, x29, #16
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl private_za_call
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB5_3
; CHECK-NEXT: // %bb.2: // %cond_clobber
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB5_3: // %cond_clobber
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: .LBB5_4: // %exit
; CHECK-NEXT: sub x8, x29, #16
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl private_za_call
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB5_6
; CHECK-NEXT: // %bb.5: // %exit
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB5_6: // %exit
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: b shared_za_call
;
; CHECK-NEWLOWERING-LABEL: cond_clobber_followed_by_clobber:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: mov w19, w0
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEWLOWERING-NEXT: bl shared_za_call
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEWLOWERING-NEXT: tbz w19, #0, .LBB5_2
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %cond_clobber
; CHECK-NEWLOWERING-NEXT: bl private_za_call
; CHECK-NEWLOWERING-NEXT: .LBB5_2: // %exit
; CHECK-NEWLOWERING-NEXT: bl private_za_call
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB5_4
; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB5_4: // %exit
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: b shared_za_call
tail call void @shared_za_call()
br i1 %cond, label %cond_clobber, label %exit
cond_clobber:
tail call void @private_za_call()
br label %exit
exit:
tail call void @private_za_call()
tail call void @shared_za_call()
ret void
}
define void @conditionally_use_za(i1 %cond) "aarch64_inout_za" nounwind {
; CHECK-COMMON-LABEL: conditionally_use_za:
; CHECK-COMMON: // %bb.0:
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
; CHECK-COMMON-NEXT: tbz w0, #0, .LBB6_4
; CHECK-COMMON-NEXT: // %bb.1: // %use_za
; CHECK-COMMON-NEXT: bl shared_za_call
; CHECK-COMMON-NEXT: sub x8, x29, #16
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
; CHECK-COMMON-NEXT: bl private_za_call
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: sub x0, x29, #16
; CHECK-COMMON-NEXT: cbnz x8, .LBB6_3
; CHECK-COMMON-NEXT: // %bb.2: // %use_za
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
; CHECK-COMMON-NEXT: .LBB6_3: // %use_za
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
; CHECK-COMMON-NEXT: .LBB6_4: // %exit
; CHECK-COMMON-NEXT: mov sp, x29
; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
br i1 %cond, label %use_za, label %exit
use_za:
tail call void @shared_za_call()
tail call void @private_za_call()
br label %exit
exit:
ret void
}
define void @diamond_mixed_za_merge_shared(i1 %cond) "aarch64_inout_za" nounwind {
; CHECK-COMMON-LABEL: diamond_mixed_za_merge_shared:
; CHECK-COMMON: // %bb.0: // %entry
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
; CHECK-COMMON-NEXT: tbz w0, #0, .LBB7_2
; CHECK-COMMON-NEXT: // %bb.1: // %then
; CHECK-COMMON-NEXT: bl shared_za_call
; CHECK-COMMON-NEXT: b .LBB7_5
; CHECK-COMMON-NEXT: .LBB7_2: // %else
; CHECK-COMMON-NEXT: sub x8, x29, #16
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
; CHECK-COMMON-NEXT: bl private_za_call
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: sub x0, x29, #16
; CHECK-COMMON-NEXT: cbnz x8, .LBB7_4
; CHECK-COMMON-NEXT: // %bb.3: // %else
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
; CHECK-COMMON-NEXT: .LBB7_4: // %else
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
; CHECK-COMMON-NEXT: .LBB7_5: // %merge_shared
; CHECK-COMMON-NEXT: bl shared_za_call
; CHECK-COMMON-NEXT: mov sp, x29
; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
entry:
br i1 %cond, label %then, label %else
then:
call void @shared_za_call()
br label %merge_shared
else:
call void @private_za_call()
br label %merge_shared
merge_shared:
call void @shared_za_call()
ret void
}
define void @diamond_mixed_za_merge_private(i1 %cond) "aarch64_inout_za" nounwind {
; CHECK-LABEL: diamond_mixed_za_merge_private:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEXT: tbz w0, #0, .LBB8_2
; CHECK-NEXT: // %bb.1: // %then
; CHECK-NEXT: bl shared_za_call
; CHECK-NEXT: b .LBB8_5
; CHECK-NEXT: .LBB8_2: // %else
; CHECK-NEXT: sub x8, x29, #16
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl private_za_call
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB8_4
; CHECK-NEXT: // %bb.3: // %else
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB8_4: // %else
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: .LBB8_5: // %merge_private_za
; CHECK-NEXT: sub x8, x29, #16
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl private_za_call
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB8_7
; CHECK-NEXT: // %bb.6: // %merge_private_za
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB8_7: // %merge_private_za
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: diamond_mixed_za_merge_private:
; CHECK-NEWLOWERING: // %bb.0: // %entry
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB8_2
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %then
; CHECK-NEWLOWERING-NEXT: bl shared_za_call
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEWLOWERING-NEXT: b .LBB8_3
; CHECK-NEWLOWERING-NEXT: .LBB8_2: // %else
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEWLOWERING-NEXT: bl private_za_call
; CHECK-NEWLOWERING-NEXT: .LBB8_3: // %merge_private_za
; CHECK-NEWLOWERING-NEXT: bl private_za_call
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB8_5
; CHECK-NEWLOWERING-NEXT: // %bb.4: // %merge_private_za
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB8_5: // %merge_private_za
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
entry:
br i1 %cond, label %then, label %else
then:
call void @shared_za_call()
br label %merge_private_za
else:
call void @private_za_call()
br label %merge_private_za
merge_private_za:
call void @private_za_call()
ret void
}
define void @critical_edge_mixed_za(i1 %c1, i1 %c2) "aarch64_inout_za" nounwind {
; CHECK-LABEL: critical_edge_mixed_za:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mov w19, w1
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEXT: tbz w0, #0, .LBB9_5
; CHECK-NEXT: // %bb.1: // %shared_path
; CHECK-NEXT: bl shared_za_call
; CHECK-NEXT: tbz w19, #0, .LBB9_8
; CHECK-NEXT: .LBB9_2: // %exit_private
; CHECK-NEXT: sub x8, x29, #16
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl private_za_call
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB9_4
; CHECK-NEXT: // %bb.3: // %exit_private
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB9_4: // %exit_private
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: b .LBB9_9
; CHECK-NEXT: .LBB9_5: // %private_path
; CHECK-NEXT: sub x8, x29, #16
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl private_za_call
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB9_7
; CHECK-NEXT: // %bb.6: // %private_path
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB9_7: // %private_path
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: tbnz w19, #0, .LBB9_2
; CHECK-NEXT: .LBB9_8: // %exit_shared
; CHECK-NEXT: bl shared_za_call
; CHECK-NEXT: .LBB9_9: // %common.ret
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: critical_edge_mixed_za:
; CHECK-NEWLOWERING: // %bb.0: // %entry
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: mov w19, w1
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB9_5
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %shared_path
; CHECK-NEWLOWERING-NEXT: bl shared_za_call
; CHECK-NEWLOWERING-NEXT: tbz w19, #0, .LBB9_8
; CHECK-NEWLOWERING-NEXT: .LBB9_2: // %exit_private
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEWLOWERING-NEXT: bl private_za_call
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB9_4
; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit_private
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB9_4: // %exit_private
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: b .LBB9_9
; CHECK-NEWLOWERING-NEXT: .LBB9_5: // %private_path
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEWLOWERING-NEXT: bl private_za_call
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB9_7
; CHECK-NEWLOWERING-NEXT: // %bb.6: // %private_path
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB9_7: // %private_path
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: tbnz w19, #0, .LBB9_2
; CHECK-NEWLOWERING-NEXT: .LBB9_8: // %exit_shared
; CHECK-NEWLOWERING-NEXT: bl shared_za_call
; CHECK-NEWLOWERING-NEXT: .LBB9_9: // %common.ret
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
entry:
br i1 %c1, label %shared_path, label %private_path
shared_path:
call void @shared_za_call()
br label %merge
private_path:
call void @private_za_call()
br label %merge
merge:
br i1 %c2, label %exit_private, label %exit_shared
exit_private:
call void @private_za_call()
ret void
exit_shared:
call void @shared_za_call()
ret void
}
define void @nested_cond_in_loop(i32 %n, i1 %cond) "aarch64_inout_za" nounwind {
; CHECK-COMMON-LABEL: nested_cond_in_loop:
; CHECK-COMMON: // %bb.0: // %entry
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: cmp w0, #1
; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
; CHECK-COMMON-NEXT: b.lt .LBB10_8
; CHECK-COMMON-NEXT: // %bb.1: // %loop.preheader
; CHECK-COMMON-NEXT: mov w19, w1
; CHECK-COMMON-NEXT: mov w20, w0
; CHECK-COMMON-NEXT: mov w21, wzr
; CHECK-COMMON-NEXT: sub x22, x29, #16
; CHECK-COMMON-NEXT: b .LBB10_4
; CHECK-COMMON-NEXT: .LBB10_2: // %use_shared
; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1
; CHECK-COMMON-NEXT: bl shared_za_call
; CHECK-COMMON-NEXT: .LBB10_3: // %latch
; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1
; CHECK-COMMON-NEXT: add w21, w21, #1
; CHECK-COMMON-NEXT: cmp w21, w20
; CHECK-COMMON-NEXT: b.ge .LBB10_8
; CHECK-COMMON-NEXT: .LBB10_4: // %loop
; CHECK-COMMON-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-COMMON-NEXT: tbnz w19, #0, .LBB10_2
; CHECK-COMMON-NEXT: // %bb.5: // %use_private
; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x22
; CHECK-COMMON-NEXT: bl private_za_call
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: sub x0, x29, #16
; CHECK-COMMON-NEXT: cbnz x8, .LBB10_7
; CHECK-COMMON-NEXT: // %bb.6: // %use_private
; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
; CHECK-COMMON-NEXT: .LBB10_7: // %use_private
; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
; CHECK-COMMON-NEXT: b .LBB10_3
; CHECK-COMMON-NEXT: .LBB10_8: // %exit
; CHECK-COMMON-NEXT: mov sp, x29
; CHECK-COMMON-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
entry:
%cmp = icmp sgt i32 %n, 0
br i1 %cmp, label %loop, label %exit
loop:
%iv = phi i32 [ 0, %entry ], [ %inc, %latch ]
br i1 %cond, label %use_shared, label %use_private
use_shared:
call void @shared_za_call()
br label %latch
use_private:
call void @private_za_call()
br label %latch
latch:
%inc = add i32 %iv, 1
%cmp2 = icmp slt i32 %inc, %n
br i1 %cmp2, label %loop, label %exit
exit:
ret void
}
define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwind {
; CHECK-LABEL: loop_with_external_entry:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mov w19, w1
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEXT: tbz w0, #0, .LBB11_2
; CHECK-NEXT: // %bb.1: // %init
; CHECK-NEXT: bl shared_za_call
; CHECK-NEXT: .LBB11_2: // %loop.preheader
; CHECK-NEXT: sub x20, x29, #16
; CHECK-NEXT: b .LBB11_4
; CHECK-NEXT: .LBB11_3: // %loop
; CHECK-NEXT: // in Loop: Header=BB11_4 Depth=1
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: tbz w19, #0, .LBB11_6
; CHECK-NEXT: .LBB11_4: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_call
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB11_3
; CHECK-NEXT: // %bb.5: // %loop
; CHECK-NEXT: // in Loop: Header=BB11_4 Depth=1
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: b .LBB11_3
; CHECK-NEXT: .LBB11_6: // %exit
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: loop_with_external_entry:
; CHECK-NEWLOWERING: // %bb.0: // %entry
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: mov w19, w1
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB11_2
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %init
; CHECK-NEWLOWERING-NEXT: bl shared_za_call
; CHECK-NEWLOWERING-NEXT: .LBB11_2: // %loop.preheader
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEWLOWERING-NEXT: .LBB11_3: // %loop
; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEWLOWERING-NEXT: bl private_za_call
; CHECK-NEWLOWERING-NEXT: tbnz w19, #0, .LBB11_3
; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_6
; CHECK-NEWLOWERING-NEXT: // %bb.5: // %exit
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB11_6: // %exit
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
entry:
br i1 %c1, label %init, label %loop
init:
call void @shared_za_call()
br label %loop
loop:
call void @private_za_call()
br i1 %c2, label %loop, label %exit
exit:
ret void
}