| // z_AIX_asm.S: - microtasking routines specifically |
| // written for Power platforms running AIX OS |
| |
| // |
| ////===----------------------------------------------------------------------===// |
| //// |
| //// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| //// See https://llvm.org/LICENSE.txt for license information. |
| //// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| //// |
| ////===----------------------------------------------------------------------===// |
| // |
| |
| // ----------------------------------------------------------------------- |
| // macros |
| // ----------------------------------------------------------------------- |
| |
| #include "kmp_config.h" |
| |
| #if KMP_OS_AIX |
| //------------------------------------------------------------------------ |
| // int |
| // __kmp_invoke_microtask( void (*pkfn) (int *gtid, int *tid, ...), |
| // int gtid, int tid, |
| // int argc, void *p_argv[] |
| // #if OMPT_SUPPORT |
| // , |
| // void **exit_frame_ptr |
| // #endif |
| // ) { |
| // #if OMPT_SUPPORT |
| // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); |
| // #endif |
| // |
| // (*pkfn)( & gtid, & tid, p_argv[0], ... ); |
| // |
| // // FIXME: This is done at call-site and can be removed here. |
| // #if OMPT_SUPPORT |
| // *exit_frame_ptr = 0; |
| // #endif |
| // |
| // return 1; |
| // } |
| // |
| // parameters: |
| // r3: pkfn |
| // r4: gtid |
| // r5: tid |
| // r6: argc |
| // r7: p_argv |
| // r8: &exit_frame |
| // |
| // return: r3 (always 1/TRUE) |
| // |
| |
| #if KMP_ARCH_PPC64_XCOFF |
| |
| .globl __kmp_invoke_microtask[DS] |
| .globl .__kmp_invoke_microtask |
| .align 4 |
| .csect __kmp_invoke_microtask[DS],3 |
| .vbyte 8, .__kmp_invoke_microtask |
| .vbyte 8, TOC[TC0] |
| .vbyte 8, 0 |
| .csect .text[PR],2 |
| .machine "pwr7" |
| .__kmp_invoke_microtask: |
| |
| |
| // -- Begin __kmp_invoke_microtask |
| // mark_begin; |
| |
| // We need to allocate a stack frame large enough to hold all of the parameters |
| // on the stack for the microtask plus what this function needs. That's 48 |
| // bytes under the XCOFF64 ABI, plus max(64, 8*(2 + argc)) for |
| // the parameters to the microtask (gtid, tid, argc elements of p_argv), |
| // plus 8 bytes to store the values of r4 and r5, and 8 bytes to store r31. |
| // With OMP-T support, we need an additional 8 bytes to save r30 to hold |
| // a copy of r8. |
| // Stack offsets relative to stack pointer: |
| // r31: -8, r30: -16, gtid: -20, tid: -24 |
| |
| mflr 0 |
| std 31, -8(1) # Save r31 to the stack |
| std 0, 16(1) # Save LR to the linkage area |
| |
| // This is unusual because normally we'd set r31 equal to r1 after the stack |
| // frame is established. In this case, however, we need to dynamically compute |
| // the stack frame size, and so we keep a direct copy of r1 to access our |
| // register save areas and restore the r1 value before returning. |
| mr 31, 1 |
| |
| // Compute the size of the "argc" portion of the parameter save area. |
| // The parameter save area is always at least 64 bytes long (i.e. 8 regs) |
| // The microtask has (2 + argc) parameters, so if argc <= 6, we need to |
| // to allocate 8*6 bytes, not 8*argc. |
| li 0, 6 |
| cmpwi 0, 6, 6 |
| iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 |
| sldi 0, 0, 3 # r0 = 8 * max(argc, 6) |
| |
| // Compute the size necessary for the local stack frame. |
| // 88 = 48 + 4 (for r4) + 4 (for r5) + 8 (for r31) + 8 (for OMP-T r30) + |
| // 8 (parameter gtid) + 8 (parameter tid) |
| li 12, 88 |
| add 12, 0, 12 |
| neg 12, 12 |
| |
| // We need to make sure that the stack frame stays aligned (to 16 bytes). |
| li 0, -16 |
| and 12, 0, 12 |
| |
| // Establish the local stack frame. |
| stdux 1, 1, 12 |
| |
| #if OMPT_SUPPORT |
| std 30, -16(31) # Save r30 to the stack |
| std 1, 0(8) |
| mr 30, 8 |
| #endif |
| |
| // Store gtid and tid to the stack because they're passed by reference to the microtask. |
| stw 4, -20(31) # Save gtid to the stack |
| stw 5, -24(31) # Save tid to the stack |
| |
| mr 12, 6 # r12 = argc |
| mr 4, 7 # r4 = p_argv |
| |
| cmpwi 0, 12, 1 |
| blt 0, .Lcall # if (argc < 1) goto .Lcall |
| |
| ld 5, 0(4) # r5 = p_argv[0] |
| |
| cmpwi 0, 12, 2 |
| blt 0, .Lcall # if (argc < 2) goto .Lcall |
| |
| ld 6, 8(4) # r6 = p_argv[1] |
| |
| cmpwi 0, 12, 3 |
| blt 0, .Lcall # if (argc < 3) goto .Lcall |
| |
| ld 7, 16(4) # r7 = p_argv[2] |
| |
| cmpwi 0, 12, 4 |
| blt 0, .Lcall # if (argc < 4) goto .Lcall |
| |
| ld 8, 24(4) # r8 = p_argv[3] |
| |
| cmpwi 0, 12, 5 |
| blt 0, .Lcall # if (argc < 5) goto .Lcall |
| |
| ld 9, 32(4) # r9 = p_argv[4] |
| |
| cmpwi 0, 12, 6 |
| blt 0, .Lcall # if (argc < 6) goto .Lcall |
| |
| ld 10, 40(4) # r10 = p_argv[5] |
| |
| cmpwi 0, 12, 7 |
| blt 0, .Lcall # if (argc < 7) goto .Lcall |
| |
| // There are more than 6 microtask parameters, so we need to store the |
| // remainder to the stack. |
| addi 12, 12, -6 # argc -= 6 |
| mtctr 12 |
| |
| // These are set to 8 bytes before the first desired store address (we're using |
| // pre-increment loads and stores in the loop below). The parameter save area |
| // for the microtask begins 48 + 8*8 == 112 bytes above r1 for XCOFF64. |
| addi 4, 4, 40 # p_argv = p_argv + 5 |
| # (i.e. skip the 5 elements we already processed) |
| addi 12, 1, 104 # r12 = stack offset (112 - 8) |
| |
| .Lnext: |
| ldu 0, 8(4) |
| stdu 0, 8(12) |
| bdnz .Lnext |
| |
| .Lcall: |
| std 2, 40(1) # Save the TOC pointer to the linkage area |
| // Load the actual function address from the function descriptor. |
| ld 12, 0(3) # Function address |
| ld 2, 8(3) # TOC pointer |
| ld 11, 16(3) # Environment pointer |
| |
| addi 3, 31, -20 # r3 = >id |
| addi 4, 31, -24 # r4 = &tid |
| |
| mtctr 12 # CTR = function address |
| bctrl # Branch to CTR |
| ld 2, 40(1) # Restore TOC pointer from linkage area |
| |
| #if OMPT_SUPPORT |
| li 3, 0 |
| std 3, 0(30) |
| #endif |
| |
| li 3, 1 |
| |
| #if OMPT_SUPPORT |
| ld 30, -16(31) # Restore r30 from the saved value on the stack |
| #endif |
| |
| mr 1, 31 |
| ld 31, -8(1) # Restore r31 from the saved value on the stack |
| ld 0, 16(1) |
| mtlr 0 # Restore LR from the linkage area |
| blr # Branch to LR |
| |
| #else // KMP_ARCH_PPC_XCOFF |
| |
| .globl __kmp_invoke_microtask[DS] |
| .globl .__kmp_invoke_microtask |
| .align 4 |
| .csect __kmp_invoke_microtask[DS],2 |
| .vbyte 4, .__kmp_invoke_microtask |
| .vbyte 4, TOC[TC0] |
| .vbyte 4, 0 |
| .csect .text[PR],2 |
| .machine "pwr7" |
| .__kmp_invoke_microtask: |
| |
| |
| // -- Begin __kmp_invoke_microtask |
| // mark_begin; |
| |
| // We need to allocate a stack frame large enough to hold all of the parameters |
| // on the stack for the microtask plus what this function needs. That's 24 |
| // bytes under the XCOFF ABI, plus max(32, 8*(2 + argc)) for |
| // the parameters to the microtask (gtid, tid, argc elements of p_argv), |
| // plus 8 bytes to store the values of r4 and r5, and 4 bytes to store r31. |
| // With OMP-T support, we need an additional 4 bytes to save r30 to hold |
| // a copy of r8. |
| // Stack offsets relative to stack pointer: |
| // r31: -4, r30: -8, gtid: -12, tid: -16 |
| |
| mflr 0 |
| stw 31, -4(1) # Save r31 to the stack |
| stw 0, 8(1) # Save LR to the linkage area |
| |
| // This is unusual because normally we'd set r31 equal to r1 after the stack |
| // frame is established. In this case, however, we need to dynamically compute |
| // the stack frame size, and so we keep a direct copy of r1 to access our |
| // register save areas and restore the r1 value before returning. |
| mr 31, 1 |
| |
| // Compute the size of the "argc" portion of the parameter save area. |
| // The parameter save area is always at least 32 bytes long (i.e. 8 regs) |
| // The microtask has (2 + argc) parameters, so if argc <= 6, we need to |
| // to allocate 4*6 bytes, not 4*argc. |
| li 0, 6 |
| cmpwi 0, 6, 6 |
| iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 |
| slwi 0, 0, 2 # r0 = 4 * max(argc, 6) |
| |
| // Compute the size necessary for the local stack frame. |
| // 56 = 32 + 4 (for r4) + 4 (for r5) + 4 (for r31) + 4 (for OMP-T r30) + |
| // 4 (parameter gtid) + 4 (parameter tid) |
| li 12, 56 |
| add 12, 0, 12 |
| neg 12, 12 |
| |
| // We need to make sure that the stack frame stays aligned (to 16 bytes). |
| li 0, -16 |
| and 12, 0, 12 |
| |
| // Establish the local stack frame. |
| stwux 1, 1, 12 |
| |
| #if OMPT_SUPPORT |
| stw 30, -8(31) # Save r30 to the stack |
| stw 1, 0(8) |
| mr 30, 8 |
| #endif |
| |
| // Store gtid and tid to the stack because they're passed by reference to the microtask. |
| stw 4, -12(31) # Save gtid to the stack |
| stw 5, -16(31) # Save tid to the stack |
| |
| mr 12, 6 # r12 = argc |
| mr 4, 7 # r4 = p_argv |
| |
| cmpwi 0, 12, 1 |
| blt 0, .Lcall # if (argc < 1) goto .Lcall |
| |
| lwz 5, 0(4) # r5 = p_argv[0] |
| |
| cmpwi 0, 12, 2 |
| blt 0, .Lcall # if (argc < 2) goto .Lcall |
| |
| lwz 6, 4(4) # r6 = p_argv[1] |
| |
| cmpwi 0, 12, 3 |
| blt 0, .Lcall # if (argc < 3) goto .Lcall |
| |
| lwz 7, 8(4) # r7 = p_argv[2] |
| |
| cmpwi 0, 12, 4 |
| blt 0, .Lcall # if (argc < 4) goto .Lcall |
| |
| lwz 8, 12(4) # r8 = p_argv[3] |
| |
| cmpwi 0, 12, 5 |
| blt 0, .Lcall # if (argc < 5) goto .Lcall |
| |
| lwz 9, 16(4) # r9 = p_argv[4] |
| |
| cmpwi 0, 12, 6 |
| blt 0, .Lcall # if (argc < 6) goto .Lcall |
| |
| lwz 10, 20(4) # r10 = p_argv[5] |
| |
| cmpwi 0, 12, 7 |
| blt 0, .Lcall # if (argc < 7) goto .Lcall |
| |
| // There are more than 6 microtask parameters, so we need to store the |
| // remainder to the stack. |
| addi 12, 12, -6 # argc -= 6 |
| mtctr 12 |
| |
| // These are set to 4 bytes before the first desired store address (we're using |
| // pre-increment loads and stores in the loop below). The parameter save area |
| // for the microtask begins 24 + 4*8 == 56 bytes above r1 for XCOFF. |
| addi 4, 4, 20 # p_argv = p_argv + 5 |
| # (i.e. skip the 5 elements we already processed) |
| addi 12, 1, 52 # r12 = stack offset (56 - 4) |
| |
| .Lnext: |
| lwzu 0, 4(4) |
| stwu 0, 4(12) |
| bdnz .Lnext |
| |
| .Lcall: |
| stw 2, 20(1) # Save the TOC pointer to the linkage area |
| // Load the actual function address from the function descriptor. |
| lwz 12, 0(3) # Function address |
| lwz 2, 4(3) # TOC pointer |
| lwz 11, 8(3) # Environment pointer |
| |
| addi 3, 31, -12 # r3 = >id |
| addi 4, 31, -16 # r4 = &tid |
| |
| mtctr 12 # CTR = function address |
| bctrl # Branch to CTR |
| lwz 2, 20(1) # Restore TOC pointer from linkage area |
| |
| #if OMPT_SUPPORT |
| li 3, 0 |
| stw 3, 0(30) |
| #endif |
| |
| li 3, 1 |
| |
| #if OMPT_SUPPORT |
| lwz 30, -8(31) # Restore r30 from the saved value on the stack |
| #endif |
| |
| mr 1, 31 |
| lwz 31, -4(1) # Restore r31 from the saved value on the stack |
| lwz 0, 8(1) |
| mtlr 0 # Restore LR from the linkage area |
| blr # Branch to LR |
| |
| #endif // KMP_ARCH_PPC64_XCOFF |
| |
| .Lfunc_end0: |
| .vbyte 4, 0x00000000 # Traceback table begin |
| .byte 0x00 # Version = 0 |
| .byte 0x09 # Language = CPlusPlus |
| .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue |
| # +HasTraceBackTableOffset, -IsInternalProcedure |
| # -HasControlledStorage, -IsTOCless |
| # -IsFloatingPointPresent |
| # -IsFloatingPointOperationLogOrAbortEnabled |
| .byte 0x61 # -IsInterruptHandler, +IsFunctionNamePresent, +IsAllocaUsed |
| # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved |
| .byte 0x80 # +IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0 |
| #if OMPT_SUPPORT |
| .byte 0x02 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 2 |
| .byte 0x06 # NumberOfFixedParms = 6 |
| #else |
| .byte 0x01 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 1 |
| .byte 0x05 # NumberOfFixedParms = 5 |
| #endif |
| .byte 0x01 # NumberOfFPParms = 0, +HasParmsOnStack |
| .vbyte 4, 0x00000000 # Parameter type = i, i, i, i, i |
| .vbyte 4, .Lfunc_end0-.__kmp_invoke_microtask # Function size |
| .vbyte 2, 0x0016 # Function name len = 22 |
| .byte "__kmp_invoke_microtask" # Function Name |
| .byte 0x1f # AllocaRegister = 31 |
| # -- End function |
| |
| // -- End __kmp_invoke_microtask |
| |
| // Support for unnamed common blocks. |
| |
| .comm .gomp_critical_user_, 32, 3 |
| #if KMP_ARCH_PPC64_XCOFF |
| .csect __kmp_unnamed_critical_addr[RW],3 |
| #else |
| .csect __kmp_unnamed_critical_addr[RW],2 |
| #endif |
| .globl __kmp_unnamed_critical_addr[RW] |
| .ptr .gomp_critical_user_ |
| |
| // -- End unnamed common block |
| |
| .toc |
| |
| #endif // KMP_OS_AIX |