|  | //  z_Linux_asm.S:  - microtasking routines specifically | 
|  | //                    written for Intel platforms running Linux* OS | 
|  |  | 
|  | // | 
|  | ////===----------------------------------------------------------------------===// | 
|  | //// | 
|  | //// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | //// See https://llvm.org/LICENSE.txt for license information. | 
|  | //// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | //// | 
|  | ////===----------------------------------------------------------------------===// | 
|  | // | 
|  |  | 
|  | // ----------------------------------------------------------------------- | 
|  | // macros | 
|  | // ----------------------------------------------------------------------- | 
|  |  | 
|  | #include "kmp_config.h" | 
|  |  | 
|  | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 | 
|  |  | 
|  | # if defined(__ELF__) && defined(__CET__) && defined(__has_include) | 
|  | # if __has_include(<cet.h>) | 
|  | # include <cet.h> | 
|  | # endif | 
|  | # endif | 
|  |  | 
|  | # if !defined(_CET_ENDBR) | 
|  | # define _CET_ENDBR | 
|  | # endif | 
|  |  | 
|  | # if KMP_MIC | 
|  | // the 'delay r16/r32/r64' should be used instead of the 'pause'. | 
|  | // The delay operation has the effect of removing the current thread from | 
|  | // the round-robin HT mechanism, and therefore speeds up the issue rate of | 
|  | // the other threads on the same core. | 
|  | // | 
|  | // A value of 0 works fine for <= 2 threads per core, but causes the EPCC | 
|  | // barrier time to increase greatly for 3 or more threads per core. | 
|  | // | 
|  | // A value of 100 works pretty well for up to 4 threads per core, but isn't | 
|  | // quite as fast as 0 for 2 threads per core. | 
|  | // | 
|  | // We need to check what happens for oversubscription / > 4 threads per core. | 
|  | // It is possible that we need to pass the delay value in as a parameter | 
|  | // that the caller determines based on the total # threads / # cores. | 
|  | // | 
|  | //.macro pause_op | 
|  | //	mov    $100, %rax | 
|  | //	delay  %rax | 
|  | //.endm | 
|  | # else | 
|  | #  define pause_op   .byte 0xf3,0x90 | 
|  | # endif // KMP_MIC | 
|  |  | 
|  | # if KMP_OS_DARWIN | 
|  | #  define KMP_PREFIX_UNDERSCORE(x) _##x  // extra underscore for OS X* symbols | 
|  | #  define KMP_LABEL(x) L_##x             // form the name of label | 
|  | .macro KMP_CFI_DEF_OFFSET | 
|  | .endmacro | 
|  | .macro KMP_CFI_OFFSET | 
|  | .endmacro | 
|  | .macro KMP_CFI_REGISTER | 
|  | .endmacro | 
|  | .macro KMP_CFI_DEF | 
|  | .endmacro | 
|  | .macro ALIGN | 
|  | .align $0 | 
|  | .endmacro | 
|  | .macro DEBUG_INFO | 
|  | /* Not sure what .size does in icc, not sure if we need to do something | 
|  | similar for OS X*. | 
|  | */ | 
|  | .endmacro | 
|  | .macro PROC | 
|  | ALIGN  4 | 
|  | .globl KMP_PREFIX_UNDERSCORE($0) | 
|  | KMP_PREFIX_UNDERSCORE($0): | 
|  | _CET_ENDBR | 
|  | .endmacro | 
|  | # else // KMP_OS_DARWIN | 
|  | #  define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols | 
|  | // Format labels so that they don't override function names in gdb's backtraces | 
|  | // MIC assembler doesn't accept .L syntax, the L works fine there (as well as | 
|  | // on OS X*) | 
|  | # if KMP_MIC | 
|  | #  define KMP_LABEL(x) L_##x          // local label | 
|  | # else | 
|  | #  define KMP_LABEL(x) .L_##x         // local label hidden from backtraces | 
|  | # endif // KMP_MIC | 
|  | .macro ALIGN size | 
|  | .align 1<<(\size) | 
|  | .endm | 
|  | .macro DEBUG_INFO proc | 
|  | .cfi_endproc | 
|  | // Not sure why we need .type and .size for the functions | 
|  | .align 16 | 
|  | .type  \proc,@function | 
|  | .size  \proc,.-\proc | 
|  | .endm | 
|  | .macro PROC proc | 
|  | ALIGN  4 | 
|  | .globl KMP_PREFIX_UNDERSCORE(\proc) | 
|  | KMP_PREFIX_UNDERSCORE(\proc): | 
|  | .cfi_startproc | 
|  | _CET_ENDBR | 
|  | .endm | 
|  | .macro KMP_CFI_DEF_OFFSET sz | 
|  | .cfi_def_cfa_offset	\sz | 
|  | .endm | 
|  | .macro KMP_CFI_OFFSET reg, sz | 
|  | .cfi_offset	\reg,\sz | 
|  | .endm | 
|  | .macro KMP_CFI_REGISTER reg | 
|  | .cfi_def_cfa_register	\reg | 
|  | .endm | 
|  | .macro KMP_CFI_DEF reg, sz | 
|  | .cfi_def_cfa	\reg,\sz | 
|  | .endm | 
|  | # endif // KMP_OS_DARWIN | 
|  | #endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 | 
|  |  | 
|  | #if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM) | 
|  |  | 
|  | # if KMP_OS_DARWIN | 
|  | #  define KMP_PREFIX_UNDERSCORE(x) _##x  // extra underscore for OS X* symbols | 
|  | #  define KMP_LABEL(x) L_##x             // form the name of label | 
|  |  | 
|  | .macro ALIGN | 
|  | .align $0 | 
|  | .endmacro | 
|  |  | 
|  | .macro DEBUG_INFO | 
|  | /* Not sure what .size does in icc, not sure if we need to do something | 
|  | similar for OS X*. | 
|  | */ | 
|  | .endmacro | 
|  |  | 
|  | .macro PROC | 
|  | ALIGN  4 | 
|  | .globl KMP_PREFIX_UNDERSCORE($0) | 
|  | KMP_PREFIX_UNDERSCORE($0): | 
|  | .endmacro | 
|  | # elif KMP_OS_WINDOWS | 
|  | #  define KMP_PREFIX_UNDERSCORE(x) x  // no extra underscore for Windows/ARM64 symbols | 
|  | // Format labels so that they don't override function names in gdb's backtraces | 
|  | #  define KMP_LABEL(x) .L_##x         // local label hidden from backtraces | 
|  |  | 
|  | .macro ALIGN size | 
|  | .align 1<<(\size) | 
|  | .endm | 
|  |  | 
|  | .macro DEBUG_INFO proc | 
|  | ALIGN 2 | 
|  | .endm | 
|  |  | 
|  | .macro PROC proc | 
|  | ALIGN 2 | 
|  | .globl KMP_PREFIX_UNDERSCORE(\proc) | 
|  | KMP_PREFIX_UNDERSCORE(\proc): | 
|  | .endm | 
|  | # else // KMP_OS_DARWIN || KMP_OS_WINDOWS | 
|  | #  define KMP_PREFIX_UNDERSCORE(x) x  // no extra underscore for Linux* OS symbols | 
|  | // Format labels so that they don't override function names in gdb's backtraces | 
|  | #  define KMP_LABEL(x) .L_##x         // local label hidden from backtraces | 
|  |  | 
|  | .macro ALIGN size | 
|  | .align 1<<(\size) | 
|  | .endm | 
|  |  | 
|  | .macro DEBUG_INFO proc | 
|  | .cfi_endproc | 
|  | // Not sure why we need .type and .size for the functions | 
|  | ALIGN 2 | 
|  | #if KMP_ARCH_ARM | 
|  | .type  \proc,%function | 
|  | #else | 
|  | .type  \proc,@function | 
|  | #endif | 
|  | .size  \proc,.-\proc | 
|  | .endm | 
|  |  | 
|  | .macro PROC proc | 
|  | ALIGN 2 | 
|  | .globl KMP_PREFIX_UNDERSCORE(\proc) | 
|  | KMP_PREFIX_UNDERSCORE(\proc): | 
|  | .cfi_startproc | 
|  | .endm | 
|  | # endif // KMP_OS_DARWIN | 
|  |  | 
|  | # if KMP_OS_LINUX | 
|  | // BTI and PAC gnu property note | 
|  | #  define NT_GNU_PROPERTY_TYPE_0 5 | 
|  | #  define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 | 
|  | #  define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1 | 
|  | #  define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2 | 
|  |  | 
|  | #  define GNU_PROPERTY(type, value)                                            \ | 
|  | .pushsection .note.gnu.property, "a";                                        \ | 
|  | .p2align 3;                                                                  \ | 
|  | .word 4;                                                                     \ | 
|  | .word 16;                                                                    \ | 
|  | .word NT_GNU_PROPERTY_TYPE_0;                                                \ | 
|  | .asciz "GNU";                                                                \ | 
|  | .word type;                                                                  \ | 
|  | .word 4;                                                                     \ | 
|  | .word value;                                                                 \ | 
|  | .word 0;                                                                     \ | 
|  | .popsection | 
|  | # endif | 
|  |  | 
|  | # if defined(__ARM_FEATURE_BTI_DEFAULT) | 
|  | #  define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI | 
|  | # else | 
|  | #  define BTI_FLAG 0 | 
|  | # endif | 
|  | # if __ARM_FEATURE_PAC_DEFAULT & 3 | 
|  | #  define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC | 
|  | # else | 
|  | #  define PAC_FLAG 0 | 
|  | # endif | 
|  |  | 
|  | # if (BTI_FLAG | PAC_FLAG) != 0 | 
|  | #  if PAC_FLAG != 0 | 
|  | #   define PACBTI_C hint #25 | 
|  | #   define PACBTI_RET hint #29 | 
|  | #  else | 
|  | #   define PACBTI_C hint #34 | 
|  | #   define PACBTI_RET | 
|  | #  endif | 
|  | #  define GNU_PROPERTY_BTI_PAC \ | 
|  | GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG) | 
|  | # else | 
|  | #  define PACBTI_C | 
|  | #  define PACBTI_RET | 
|  | #  define GNU_PROPERTY_BTI_PAC | 
|  | # endif | 
|  | #endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM) | 
|  |  | 
|  | .macro COMMON name, size, align_power | 
|  | #if KMP_OS_DARWIN | 
|  | .comm \name, \size | 
|  | #elif KMP_OS_WINDOWS | 
|  | .comm \name, \size, \align_power | 
|  | #else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS | 
|  | .comm \name, \size, (1<<(\align_power)) | 
|  | #endif | 
|  | .endm | 
|  |  | 
|  | // ----------------------------------------------------------------------- | 
|  | // data | 
|  | // ----------------------------------------------------------------------- | 
|  |  | 
|  | #ifdef KMP_GOMP_COMPAT | 
|  |  | 
|  | // Support for unnamed common blocks. | 
|  | // | 
|  | // Because the symbol ".gomp_critical_user_" contains a ".", we have to | 
|  | // put this stuff in assembly. | 
|  |  | 
|  | # if KMP_ARCH_X86 | 
|  | #  if KMP_OS_DARWIN | 
|  | .data | 
|  | .comm .gomp_critical_user_,32 | 
|  | .data | 
|  | .globl ___kmp_unnamed_critical_addr | 
|  | ___kmp_unnamed_critical_addr: | 
|  | .long .gomp_critical_user_ | 
|  | #  else /* Linux* OS */ | 
|  | .data | 
|  | .comm .gomp_critical_user_,32,8 | 
|  | .data | 
|  | ALIGN 4 | 
|  | .global __kmp_unnamed_critical_addr | 
|  | __kmp_unnamed_critical_addr: | 
|  | .4byte .gomp_critical_user_ | 
|  | .type __kmp_unnamed_critical_addr,@object | 
|  | .size __kmp_unnamed_critical_addr,4 | 
|  | #  endif /* KMP_OS_DARWIN */ | 
|  | # endif /* KMP_ARCH_X86 */ | 
|  |  | 
|  | # if KMP_ARCH_X86_64 | 
|  | #  if KMP_OS_DARWIN | 
|  | .data | 
|  | .comm .gomp_critical_user_,32 | 
|  | .data | 
|  | .globl ___kmp_unnamed_critical_addr | 
|  | ___kmp_unnamed_critical_addr: | 
|  | .quad .gomp_critical_user_ | 
|  | #  else /* Linux* OS */ | 
|  | .data | 
|  | .comm .gomp_critical_user_,32,8 | 
|  | .data | 
|  | ALIGN 8 | 
|  | .global __kmp_unnamed_critical_addr | 
|  | __kmp_unnamed_critical_addr: | 
|  | .8byte .gomp_critical_user_ | 
|  | .type __kmp_unnamed_critical_addr,@object | 
|  | .size __kmp_unnamed_critical_addr,8 | 
|  | #  endif /* KMP_OS_DARWIN */ | 
|  | # endif /* KMP_ARCH_X86_64 */ | 
|  |  | 
|  | #endif /* KMP_GOMP_COMPAT */ | 
|  |  | 
|  |  | 
|  | #if KMP_ARCH_X86 && !KMP_ARCH_PPC64 | 
|  |  | 
|  | // ----------------------------------------------------------------------- | 
|  | // microtasking routines specifically written for IA-32 architecture | 
|  | // running Linux* OS | 
|  | // ----------------------------------------------------------------------- | 
|  |  | 
|  | .ident "Intel Corporation" | 
|  | .data | 
|  | ALIGN 4 | 
|  | // void | 
|  | // __kmp_x86_pause( void ); | 
|  |  | 
|  | .text | 
|  | PROC  __kmp_x86_pause | 
|  |  | 
|  | pause_op | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_x86_pause | 
|  |  | 
|  | # if !KMP_ASM_INTRINS | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // kmp_int32 | 
|  | // __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); | 
|  |  | 
|  | PROC      __kmp_test_then_add32 | 
|  |  | 
|  | movl      4(%esp), %ecx | 
|  | movl      8(%esp), %eax | 
|  | lock | 
|  | xaddl     %eax,(%ecx) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_test_then_add32 | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_xchg_fixed8 | 
|  | // | 
|  | // kmp_int32 | 
|  | // __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	4(%esp) | 
|  | // 	d:	8(%esp) | 
|  | // | 
|  | // return:	%al | 
|  | PROC  __kmp_xchg_fixed8 | 
|  |  | 
|  | movl      4(%esp), %ecx    // "p" | 
|  | movb      8(%esp), %al	// "d" | 
|  |  | 
|  | lock | 
|  | xchgb     %al,(%ecx) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_xchg_fixed8 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_xchg_fixed16 | 
|  | // | 
|  | // kmp_int16 | 
|  | // __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	4(%esp) | 
|  | // 	d:	8(%esp) | 
|  | // return:     %ax | 
|  | PROC  __kmp_xchg_fixed16 | 
|  |  | 
|  | movl      4(%esp), %ecx    // "p" | 
|  | movw      8(%esp), %ax	// "d" | 
|  |  | 
|  | lock | 
|  | xchgw     %ax,(%ecx) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_xchg_fixed16 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_xchg_fixed32 | 
|  | // | 
|  | // kmp_int32 | 
|  | // __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	4(%esp) | 
|  | // 	d:	8(%esp) | 
|  | // | 
|  | // return:	%eax | 
|  | PROC  __kmp_xchg_fixed32 | 
|  |  | 
|  | movl      4(%esp), %ecx    // "p" | 
|  | movl      8(%esp), %eax	// "d" | 
|  |  | 
|  | lock | 
|  | xchgl     %eax,(%ecx) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_xchg_fixed32 | 
|  |  | 
|  |  | 
|  | // kmp_int8 | 
|  | // __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); | 
|  | PROC  __kmp_compare_and_store8 | 
|  |  | 
|  | movl      4(%esp), %ecx | 
|  | movb      8(%esp), %al | 
|  | movb      12(%esp), %dl | 
|  | lock | 
|  | cmpxchgb  %dl,(%ecx) | 
|  | sete      %al           // if %al == (%ecx) set %al = 1 else set %al = 0 | 
|  | and       $1, %eax      // sign extend previous instruction | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store8 | 
|  |  | 
|  | // kmp_int16 | 
|  | // __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv); | 
|  | PROC  __kmp_compare_and_store16 | 
|  |  | 
|  | movl      4(%esp), %ecx | 
|  | movw      8(%esp), %ax | 
|  | movw      12(%esp), %dx | 
|  | lock | 
|  | cmpxchgw  %dx,(%ecx) | 
|  | sete      %al           // if %ax == (%ecx) set %al = 1 else set %al = 0 | 
|  | and       $1, %eax      // sign extend previous instruction | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store16 | 
|  |  | 
|  | // kmp_int32 | 
|  | // __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv); | 
|  | PROC  __kmp_compare_and_store32 | 
|  |  | 
|  | movl      4(%esp), %ecx | 
|  | movl      8(%esp), %eax | 
|  | movl      12(%esp), %edx | 
|  | lock | 
|  | cmpxchgl  %edx,(%ecx) | 
|  | sete      %al          // if %eax == (%ecx) set %al = 1 else set %al = 0 | 
|  | and       $1, %eax     // sign extend previous instruction | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store32 | 
|  |  | 
|  | // kmp_int32 | 
|  | // __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s ); | 
|  | PROC  __kmp_compare_and_store64 | 
|  |  | 
|  | pushl     %ebp | 
|  | movl      %esp, %ebp | 
|  | pushl     %ebx | 
|  | pushl     %edi | 
|  | movl      8(%ebp), %edi | 
|  | movl      12(%ebp), %eax        // "cv" low order word | 
|  | movl      16(%ebp), %edx        // "cv" high order word | 
|  | movl      20(%ebp), %ebx        // "sv" low order word | 
|  | movl      24(%ebp), %ecx        // "sv" high order word | 
|  | lock | 
|  | cmpxchg8b (%edi) | 
|  | sete      %al      // if %edx:eax == (%edi) set %al = 1 else set %al = 0 | 
|  | and       $1, %eax // sign extend previous instruction | 
|  | popl      %edi | 
|  | popl      %ebx | 
|  | movl      %ebp, %esp | 
|  | popl      %ebp | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store64 | 
|  |  | 
|  | // kmp_int8 | 
|  | // __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); | 
|  | PROC  __kmp_compare_and_store_ret8 | 
|  |  | 
|  | movl      4(%esp), %ecx | 
|  | movb      8(%esp), %al | 
|  | movb      12(%esp), %dl | 
|  | lock | 
|  | cmpxchgb  %dl,(%ecx) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store_ret8 | 
|  |  | 
|  | // kmp_int16 | 
|  | // __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv, | 
|  | //                               kmp_int16 sv); | 
|  | PROC  __kmp_compare_and_store_ret16 | 
|  |  | 
|  | movl      4(%esp), %ecx | 
|  | movw      8(%esp), %ax | 
|  | movw      12(%esp), %dx | 
|  | lock | 
|  | cmpxchgw  %dx,(%ecx) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store_ret16 | 
|  |  | 
|  | // kmp_int32 | 
|  | // __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv, | 
|  | //                               kmp_int32 sv); | 
|  | PROC  __kmp_compare_and_store_ret32 | 
|  |  | 
|  | movl      4(%esp), %ecx | 
|  | movl      8(%esp), %eax | 
|  | movl      12(%esp), %edx | 
|  | lock | 
|  | cmpxchgl  %edx,(%ecx) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store_ret32 | 
|  |  | 
|  | // kmp_int64 | 
|  | // __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv, | 
|  | //                               kmp_int64 sv); | 
|  | PROC  __kmp_compare_and_store_ret64 | 
|  |  | 
|  | pushl     %ebp | 
|  | movl      %esp, %ebp | 
|  | pushl     %ebx | 
|  | pushl     %edi | 
|  | movl      8(%ebp), %edi | 
|  | movl      12(%ebp), %eax        // "cv" low order word | 
|  | movl      16(%ebp), %edx        // "cv" high order word | 
|  | movl      20(%ebp), %ebx        // "sv" low order word | 
|  | movl      24(%ebp), %ecx        // "sv" high order word | 
|  | lock | 
|  | cmpxchg8b (%edi) | 
|  | popl      %edi | 
|  | popl      %ebx | 
|  | movl      %ebp, %esp | 
|  | popl      %ebp | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store_ret64 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_xchg_real32 | 
|  | // | 
|  | // kmp_real32 | 
|  | // __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); | 
|  | // | 
|  | // parameters: | 
|  | // 	addr:	4(%esp) | 
|  | // 	data:	8(%esp) | 
|  | // | 
|  | // return:	%eax | 
|  | PROC  __kmp_xchg_real32 | 
|  |  | 
|  | pushl   %ebp | 
|  | movl    %esp, %ebp | 
|  | subl    $4, %esp | 
|  | pushl   %esi | 
|  |  | 
|  | movl    4(%ebp), %esi | 
|  | flds    (%esi) | 
|  | // load <addr> | 
|  | fsts    -4(%ebp) | 
|  | // store old value | 
|  |  | 
|  | movl    8(%ebp), %eax | 
|  |  | 
|  | lock | 
|  | xchgl   %eax, (%esi) | 
|  |  | 
|  | flds    -4(%ebp) | 
|  | // return old value | 
|  |  | 
|  | popl    %esi | 
|  | movl    %ebp, %esp | 
|  | popl    %ebp | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_xchg_real32 | 
|  |  | 
|  | # endif /* !KMP_ASM_INTRINS */ | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // int | 
|  | // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), | 
|  | //                         int gtid, int tid, | 
|  | //                         int argc, void *p_argv[] | 
|  | // #if OMPT_SUPPORT | 
|  | //                         , | 
|  | //                         void **exit_frame_ptr | 
|  | // #endif | 
|  | //                       ) { | 
|  | // #if OMPT_SUPPORT | 
|  | //   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); | 
|  | // #endif | 
|  | // | 
|  | //   (*pkfn)( & gtid, & tid, argv[0], ... ); | 
|  | //   return 1; | 
|  | // } | 
|  |  | 
|  | // -- Begin __kmp_invoke_microtask | 
|  | // mark_begin; | 
|  | PROC  __kmp_invoke_microtask | 
|  |  | 
|  | pushl %ebp | 
|  | KMP_CFI_DEF_OFFSET 8 | 
|  | KMP_CFI_OFFSET ebp,-8 | 
|  | movl %esp,%ebp		// establish the base pointer for this routine. | 
|  | KMP_CFI_REGISTER ebp | 
|  | subl $8,%esp		// allocate space for two local variables. | 
|  | // These varibales are: | 
|  | //	argv: -4(%ebp) | 
|  | //	temp: -8(%ebp) | 
|  | // | 
|  | pushl %ebx		// save %ebx to use during this routine | 
|  | // | 
|  | #if OMPT_SUPPORT | 
|  | movl 28(%ebp),%ebx	// get exit_frame address | 
|  | movl %ebp,(%ebx)	// save exit_frame | 
|  | #endif | 
|  |  | 
|  | movl 20(%ebp),%ebx	// Stack alignment - # args | 
|  | addl $2,%ebx		// #args +2  Always pass at least 2 args (gtid and tid) | 
|  | shll $2,%ebx		// Number of bytes used on stack: (#args+2)*4 | 
|  | movl %esp,%eax		// | 
|  | subl %ebx,%eax		// %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this | 
|  | movl %eax,%ebx		// Save to %ebx | 
|  | andl $0xFFFFFF80,%eax	// mask off 7 bits | 
|  | subl %eax,%ebx		// Amount to subtract from %esp | 
|  | subl %ebx,%esp		// Prepare the stack ptr -- | 
|  | //   now it will be aligned on 128-byte boundary at the call | 
|  |  | 
|  | movl 24(%ebp),%eax	// copy from p_argv[] | 
|  | movl %eax,-4(%ebp)	// into the local variable *argv. | 
|  |  | 
|  | movl 20(%ebp),%ebx	// argc is 20(%ebp) | 
|  | shll $2,%ebx | 
|  |  | 
|  | KMP_LABEL(invoke_2): | 
|  | cmpl $0,%ebx | 
|  | jg  KMP_LABEL(invoke_4) | 
|  | jmp KMP_LABEL(invoke_3) | 
|  | ALIGN 2 | 
|  | KMP_LABEL(invoke_4): | 
|  | movl -4(%ebp),%eax | 
|  | subl $4,%ebx			// decrement argc. | 
|  | addl %ebx,%eax			// index into argv. | 
|  | movl (%eax),%edx | 
|  | pushl %edx | 
|  |  | 
|  | jmp KMP_LABEL(invoke_2) | 
|  | ALIGN 2 | 
|  | KMP_LABEL(invoke_3): | 
|  | leal 16(%ebp),%eax		// push & tid | 
|  | pushl %eax | 
|  |  | 
|  | leal 12(%ebp),%eax		// push & gtid | 
|  | pushl %eax | 
|  |  | 
|  | movl 8(%ebp),%ebx | 
|  | call *%ebx			// call (*pkfn)(); | 
|  |  | 
|  | movl $1,%eax			// return 1; | 
|  |  | 
|  | movl -12(%ebp),%ebx		// restore %ebx | 
|  | leave | 
|  | KMP_CFI_DEF esp,4 | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_invoke_microtask | 
|  | // -- End  __kmp_invoke_microtask | 
|  |  | 
|  |  | 
|  | // kmp_uint64 | 
|  | // __kmp_hardware_timestamp(void) | 
|  | PROC  __kmp_hardware_timestamp | 
|  | rdtsc | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_hardware_timestamp | 
|  | // -- End  __kmp_hardware_timestamp | 
|  |  | 
|  | #endif /* KMP_ARCH_X86 */ | 
|  |  | 
|  |  | 
|  | #if KMP_ARCH_X86_64 | 
|  |  | 
|  | // ----------------------------------------------------------------------- | 
|  | // microtasking routines specifically written for IA-32 architecture and | 
|  | // Intel(R) 64 running Linux* OS | 
|  | // ----------------------------------------------------------------------- | 
|  |  | 
|  | // -- Machine type P | 
|  | // mark_description "Intel Corporation"; | 
|  | .ident "Intel Corporation" | 
|  | // --	.file "z_Linux_asm.S" | 
|  | .data | 
|  | ALIGN 4 | 
|  |  | 
|  | // To prevent getting our code into .data section .text added to every routine | 
|  | // definition for x86_64. | 
|  | //------------------------------------------------------------------------ | 
|  | # if !KMP_ASM_INTRINS | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_test_then_add32 | 
|  | // | 
|  | // kmp_int32 | 
|  | // __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	d:	%esi | 
|  | // | 
|  | // return:	%eax | 
|  | .text | 
|  | PROC  __kmp_test_then_add32 | 
|  |  | 
|  | movl      %esi, %eax	// "d" | 
|  | lock | 
|  | xaddl     %eax,(%rdi) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_test_then_add32 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_test_then_add64 | 
|  | // | 
|  | // kmp_int64 | 
|  | // __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	d:	%rsi | 
|  | //	return:	%rax | 
|  | .text | 
|  | PROC  __kmp_test_then_add64 | 
|  |  | 
|  | movq      %rsi, %rax	// "d" | 
|  | lock | 
|  | xaddq     %rax,(%rdi) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_test_then_add64 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_xchg_fixed8 | 
|  | // | 
|  | // kmp_int32 | 
|  | // __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	d:	%sil | 
|  | // | 
|  | // return:	%al | 
|  | .text | 
|  | PROC  __kmp_xchg_fixed8 | 
|  |  | 
|  | movb      %sil, %al	// "d" | 
|  |  | 
|  | lock | 
|  | xchgb     %al,(%rdi) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_xchg_fixed8 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_xchg_fixed16 | 
|  | // | 
|  | // kmp_int16 | 
|  | // __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	d:	%si | 
|  | // return:     %ax | 
|  | .text | 
|  | PROC  __kmp_xchg_fixed16 | 
|  |  | 
|  | movw      %si, %ax	// "d" | 
|  |  | 
|  | lock | 
|  | xchgw     %ax,(%rdi) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_xchg_fixed16 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_xchg_fixed32 | 
|  | // | 
|  | // kmp_int32 | 
|  | // __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	d:	%esi | 
|  | // | 
|  | // return:	%eax | 
|  | .text | 
|  | PROC  __kmp_xchg_fixed32 | 
|  |  | 
|  | movl      %esi, %eax	// "d" | 
|  |  | 
|  | lock | 
|  | xchgl     %eax,(%rdi) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_xchg_fixed32 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_xchg_fixed64 | 
|  | // | 
|  | // kmp_int64 | 
|  | // __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	d:	%rsi | 
|  | // return:	%rax | 
|  | .text | 
|  | PROC  __kmp_xchg_fixed64 | 
|  |  | 
|  | movq      %rsi, %rax	// "d" | 
|  |  | 
|  | lock | 
|  | xchgq     %rax,(%rdi) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_xchg_fixed64 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_compare_and_store8 | 
|  | // | 
|  | // kmp_int8 | 
|  | // __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	cv:	%esi | 
|  | //	sv:	%edx | 
|  | // | 
|  | // return:	%eax | 
|  | .text | 
|  | PROC  __kmp_compare_and_store8 | 
|  |  | 
|  | movb      %sil, %al	// "cv" | 
|  | lock | 
|  | cmpxchgb  %dl,(%rdi) | 
|  | sete      %al           // if %al == (%rdi) set %al = 1 else set %al = 0 | 
|  | andq      $1, %rax      // sign extend previous instruction for return value | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store8 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_compare_and_store16 | 
|  | // | 
|  | // kmp_int16 | 
|  | // __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	cv:	%si | 
|  | //	sv:	%dx | 
|  | // | 
|  | // return:	%eax | 
|  | .text | 
|  | PROC  __kmp_compare_and_store16 | 
|  |  | 
|  | movw      %si, %ax	// "cv" | 
|  | lock | 
|  | cmpxchgw  %dx,(%rdi) | 
|  | sete      %al           // if %ax == (%rdi) set %al = 1 else set %al = 0 | 
|  | andq      $1, %rax      // sign extend previous instruction for return value | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store16 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_compare_and_store32 | 
|  | // | 
|  | // kmp_int32 | 
|  | // __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	cv:	%esi | 
|  | //	sv:	%edx | 
|  | // | 
|  | // return:	%eax | 
|  | .text | 
|  | PROC  __kmp_compare_and_store32 | 
|  |  | 
|  | movl      %esi, %eax	// "cv" | 
|  | lock | 
|  | cmpxchgl  %edx,(%rdi) | 
|  | sete      %al           // if %eax == (%rdi) set %al = 1 else set %al = 0 | 
|  | andq      $1, %rax      // sign extend previous instruction for return value | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store32 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_compare_and_store64 | 
|  | // | 
|  | // kmp_int32 | 
|  | // __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	cv:	%rsi | 
|  | //	sv:	%rdx | 
|  | //	return:	%eax | 
|  | .text | 
|  | PROC  __kmp_compare_and_store64 | 
|  |  | 
|  | movq      %rsi, %rax    // "cv" | 
|  | lock | 
|  | cmpxchgq  %rdx,(%rdi) | 
|  | sete      %al           // if %rax == (%rdi) set %al = 1 else set %al = 0 | 
|  | andq      $1, %rax      // sign extend previous instruction for return value | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store64 | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_compare_and_store_ret8 | 
|  | // | 
|  | // kmp_int8 | 
|  | // __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	cv:	%esi | 
|  | //	sv:	%edx | 
|  | // | 
|  | // return:	%eax | 
|  | .text | 
|  | PROC  __kmp_compare_and_store_ret8 | 
|  |  | 
|  | movb      %sil, %al	// "cv" | 
|  | lock | 
|  | cmpxchgb  %dl,(%rdi) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store_ret8 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_compare_and_store_ret16 | 
|  | // | 
|  | // kmp_int16 | 
|  | // __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	cv:	%si | 
|  | //	sv:	%dx | 
|  | // | 
|  | // return:	%eax | 
|  | .text | 
|  | PROC  __kmp_compare_and_store_ret16 | 
|  |  | 
|  | movw      %si, %ax	// "cv" | 
|  | lock | 
|  | cmpxchgw  %dx,(%rdi) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store_ret16 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_compare_and_store_ret32 | 
|  | // | 
|  | // kmp_int32 | 
|  | // __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	cv:	%esi | 
|  | //	sv:	%edx | 
|  | // | 
|  | // return:	%eax | 
|  | .text | 
|  | PROC  __kmp_compare_and_store_ret32 | 
|  |  | 
|  | movl      %esi, %eax	// "cv" | 
|  | lock | 
|  | cmpxchgl  %edx,(%rdi) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store_ret32 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_compare_and_store_ret64 | 
|  | // | 
|  | // kmp_int64 | 
|  | // __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); | 
|  | // | 
|  | // parameters: | 
|  | // 	p:	%rdi | 
|  | // 	cv:	%rsi | 
|  | //	sv:	%rdx | 
|  | //	return:	%eax | 
|  | .text | 
|  | PROC  __kmp_compare_and_store_ret64 | 
|  |  | 
|  | movq      %rsi, %rax    // "cv" | 
|  | lock | 
|  | cmpxchgq  %rdx,(%rdi) | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_compare_and_store_ret64 | 
|  |  | 
|  | # endif /* !KMP_ASM_INTRINS */ | 
|  |  | 
|  |  | 
|  | # if !KMP_MIC | 
|  |  | 
|  | # if !KMP_ASM_INTRINS | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_xchg_real32 | 
|  | // | 
|  | // kmp_real32 | 
|  | // __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); | 
|  | // | 
|  | // parameters: | 
|  | // 	addr:	%rdi | 
|  | // 	data:	%xmm0 (lower 4 bytes) | 
|  | // | 
|  | // return:	%xmm0 (lower 4 bytes) | 
|  | .text | 
|  | PROC  __kmp_xchg_real32 | 
|  |  | 
|  | movd	%xmm0, %eax	// load "data" to eax | 
|  |  | 
|  | lock | 
|  | xchgl %eax, (%rdi) | 
|  |  | 
|  | movd	%eax, %xmm0	// load old value into return register | 
|  |  | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_xchg_real32 | 
|  |  | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_xchg_real64 | 
|  | // | 
|  | // kmp_real64 | 
|  | // __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); | 
|  | // | 
|  | // parameters: | 
|  | //      addr:   %rdi | 
|  | //      data:   %xmm0 (lower 8 bytes) | 
|  | //      return: %xmm0 (lower 8 bytes) | 
|  | .text | 
|  | PROC  __kmp_xchg_real64 | 
|  |  | 
|  | movd	%xmm0, %rax	// load "data" to rax | 
|  |  | 
|  | lock | 
|  | xchgq  %rax, (%rdi) | 
|  |  | 
|  | movd	%rax, %xmm0	// load old value into return register | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_xchg_real64 | 
|  |  | 
|  |  | 
|  | # endif /* !KMP_MIC */ | 
|  |  | 
|  | # endif /* !KMP_ASM_INTRINS */ | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // int | 
|  | // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), | 
|  | //                         int gtid, int tid, | 
|  | //                         int argc, void *p_argv[] | 
|  | // #if OMPT_SUPPORT | 
|  | //                         , | 
|  | //                         void **exit_frame_ptr | 
|  | // #endif | 
|  | //                       ) { | 
|  | // #if OMPT_SUPPORT | 
|  | //   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); | 
|  | // #endif | 
|  | // | 
|  | //   (*pkfn)( & gtid, & tid, argv[0], ... ); | 
|  | //   return 1; | 
|  | // } | 
|  | // | 
|  | // note: at call to pkfn must have %rsp 128-byte aligned for compiler | 
|  | // | 
|  | // parameters: | 
|  | //      %rdi:  	pkfn | 
|  | //	%esi:	gtid | 
|  | //	%edx:	tid | 
|  | //	%ecx:	argc | 
|  | //	%r8:	p_argv | 
|  | //	%r9:	&exit_frame | 
|  | // | 
|  | // locals: | 
|  | //	__gtid:	gtid parm pushed on stack so can pass >id to pkfn | 
|  | //	__tid:	tid parm pushed on stack so can pass &tid to pkfn | 
|  | // | 
|  | // reg temps: | 
|  | //	%rax:	used all over the place | 
|  | //	%rdx:	used in stack pointer alignment calculation | 
|  | //	%r11:	used to traverse p_argv array | 
|  | //	%rsi:	used as temporary for stack parameters | 
|  | //		used as temporary for number of pkfn parms to push | 
|  | //	%rbx:	used to hold pkfn address, and zero constant, callee-save | 
|  | // | 
|  | // return:	%eax 	(always 1/TRUE) | 
|  | __gtid = -16 | 
|  | __tid = -24 | 
|  |  | 
|  | // -- Begin __kmp_invoke_microtask | 
|  | // mark_begin; | 
|  | .text | 
|  | PROC  __kmp_invoke_microtask | 
|  |  | 
|  | pushq 	%rbp		// save base pointer | 
|  | KMP_CFI_DEF_OFFSET 16 | 
|  | KMP_CFI_OFFSET rbp,-16 | 
|  | movq 	%rsp,%rbp	// establish the base pointer for this routine. | 
|  | KMP_CFI_REGISTER rbp | 
|  |  | 
|  | #if OMPT_SUPPORT | 
|  | movq	%rbp, (%r9)	// save exit_frame | 
|  | #endif | 
|  |  | 
|  | pushq 	%rbx		// %rbx is callee-saved register | 
|  | pushq	%rsi		// Put gtid on stack so can pass &tgid to pkfn | 
|  | pushq	%rdx		// Put tid on stack so can pass &tid to pkfn | 
|  |  | 
|  | movq	%rcx, %rax	// Stack alignment calculation begins; argc -> %rax | 
|  | movq	$0, %rbx	// constant for cmovs later | 
|  | subq	$4, %rax	// subtract four args passed in registers to pkfn | 
|  | #if KMP_MIC | 
|  | js	KMP_LABEL(kmp_0)	// jump to movq | 
|  | jmp	KMP_LABEL(kmp_0_exit)	// jump ahead | 
|  | KMP_LABEL(kmp_0): | 
|  | movq	%rbx, %rax	// zero negative value in %rax <- max(0, argc-4) | 
|  | KMP_LABEL(kmp_0_exit): | 
|  | #else | 
|  | cmovsq	%rbx, %rax	// zero negative value in %rax <- max(0, argc-4) | 
|  | #endif // KMP_MIC | 
|  |  | 
|  | movq	%rax, %rsi	// save max(0, argc-4) -> %rsi for later | 
|  | shlq 	$3, %rax	// Number of bytes used on stack: max(0, argc-4)*8 | 
|  |  | 
|  | movq 	%rsp, %rdx	// | 
|  | subq 	%rax, %rdx	// %rsp-(max(0,argc-4)*8) -> %rdx -- | 
|  | // without align, stack ptr would be this | 
|  | movq 	%rdx, %rax	// Save to %rax | 
|  |  | 
|  | andq 	$0xFFFFFFFFFFFFFF80, %rax  // mask off lower 7 bits (128 bytes align) | 
|  | subq 	%rax, %rdx	// Amount to subtract from %rsp | 
|  | subq 	%rdx, %rsp	// Prepare the stack ptr -- | 
|  | // now %rsp will align to 128-byte boundary at call site | 
|  |  | 
|  | // setup pkfn parameter reg and stack | 
|  | movq	%rcx, %rax	// argc -> %rax | 
|  | cmpq	$0, %rsi | 
|  | je	KMP_LABEL(kmp_invoke_pass_parms)	// jump ahead if no parms to push | 
|  | shlq	$3, %rcx	// argc*8 -> %rcx | 
|  | movq 	%r8, %rdx	// p_argv -> %rdx | 
|  | addq	%rcx, %rdx	// &p_argv[argc] -> %rdx | 
|  |  | 
|  | movq	%rsi, %rcx	// max (0, argc-4) -> %rcx | 
|  |  | 
|  | KMP_LABEL(kmp_invoke_push_parms): | 
|  | // push nth - 7th parms to pkfn on stack | 
|  | subq	$8, %rdx	// decrement p_argv pointer to previous parm | 
|  | movq	(%rdx), %rsi	// p_argv[%rcx-1] -> %rsi | 
|  | pushq	%rsi		// push p_argv[%rcx-1] onto stack (reverse order) | 
|  | subl	$1, %ecx | 
|  |  | 
|  | // C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e | 
|  | //		if the name of the label that is an operand of this jecxz starts with a dot ("."); | 
|  | //	   Apple's linker does not support 1-byte length relocation; | 
|  | //         Resolution: replace all .labelX entries with L_labelX. | 
|  |  | 
|  | jecxz   KMP_LABEL(kmp_invoke_pass_parms)  // stop when four p_argv[] parms left | 
|  | jmp	KMP_LABEL(kmp_invoke_push_parms) | 
|  | ALIGN 3 | 
|  | KMP_LABEL(kmp_invoke_pass_parms):	// put 1st - 6th parms to pkfn in registers. | 
|  | // order here is important to avoid trashing | 
|  | // registers used for both input and output parms! | 
|  | movq	%rdi, %rbx	// pkfn -> %rbx | 
|  | leaq	__gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) | 
|  | leaq	__tid(%rbp), %rsi  // &tid -> %rsi (store 2nd parm to pkfn) | 
|  | // Check if argc is 0 | 
|  | cmpq $0, %rax | 
|  | je KMP_LABEL(kmp_no_args) // Jump ahead | 
|  |  | 
|  | movq	%r8, %r11	// p_argv -> %r11 | 
|  |  | 
|  | #if KMP_MIC | 
|  | cmpq	$4, %rax	// argc >= 4? | 
|  | jns	KMP_LABEL(kmp_4)	// jump to movq | 
|  | jmp	KMP_LABEL(kmp_4_exit)	// jump ahead | 
|  | KMP_LABEL(kmp_4): | 
|  | movq	24(%r11), %r9	// p_argv[3] -> %r9 (store 6th parm to pkfn) | 
|  | KMP_LABEL(kmp_4_exit): | 
|  |  | 
|  | cmpq	$3, %rax	// argc >= 3? | 
|  | jns	KMP_LABEL(kmp_3)	// jump to movq | 
|  | jmp	KMP_LABEL(kmp_3_exit)	// jump ahead | 
|  | KMP_LABEL(kmp_3): | 
|  | movq	16(%r11), %r8	// p_argv[2] -> %r8 (store 5th parm to pkfn) | 
|  | KMP_LABEL(kmp_3_exit): | 
|  |  | 
|  | cmpq	$2, %rax	// argc >= 2? | 
|  | jns	KMP_LABEL(kmp_2)	// jump to movq | 
|  | jmp	KMP_LABEL(kmp_2_exit)	// jump ahead | 
|  | KMP_LABEL(kmp_2): | 
|  | movq	8(%r11), %rcx	// p_argv[1] -> %rcx (store 4th parm to pkfn) | 
|  | KMP_LABEL(kmp_2_exit): | 
|  |  | 
|  | cmpq	$1, %rax	// argc >= 1? | 
|  | jns	KMP_LABEL(kmp_1)	// jump to movq | 
|  | jmp	KMP_LABEL(kmp_1_exit)	// jump ahead | 
|  | KMP_LABEL(kmp_1): | 
|  | movq	(%r11), %rdx	// p_argv[0] -> %rdx (store 3rd parm to pkfn) | 
|  | KMP_LABEL(kmp_1_exit): | 
|  | #else | 
|  | cmpq	$4, %rax	// argc >= 4? | 
|  | cmovnsq	24(%r11), %r9	// p_argv[3] -> %r9 (store 6th parm to pkfn) | 
|  |  | 
|  | cmpq	$3, %rax	// argc >= 3? | 
|  | cmovnsq	16(%r11), %r8	// p_argv[2] -> %r8 (store 5th parm to pkfn) | 
|  |  | 
|  | cmpq	$2, %rax	// argc >= 2? | 
|  | cmovnsq	8(%r11), %rcx	// p_argv[1] -> %rcx (store 4th parm to pkfn) | 
|  |  | 
|  | cmpq	$1, %rax	// argc >= 1? | 
|  | cmovnsq	(%r11), %rdx	// p_argv[0] -> %rdx (store 3rd parm to pkfn) | 
|  | #endif // KMP_MIC | 
|  |  | 
|  | KMP_LABEL(kmp_no_args): | 
|  | call	*%rbx		// call (*pkfn)(); | 
|  | movq	$1, %rax	// move 1 into return register; | 
|  |  | 
|  | movq	-8(%rbp), %rbx	// restore %rbx	using %rbp since %rsp was modified | 
|  | movq 	%rbp, %rsp	// restore stack pointer | 
|  | popq 	%rbp		// restore frame pointer | 
|  | KMP_CFI_DEF rsp,8 | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_invoke_microtask | 
|  | // -- End  __kmp_invoke_microtask | 
|  |  | 
|  | // kmp_uint64 | 
|  | // __kmp_hardware_timestamp(void) | 
|  | .text | 
|  | PROC  __kmp_hardware_timestamp | 
|  | rdtsc | 
|  | shlq    $32, %rdx | 
|  | orq     %rdx, %rax | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_hardware_timestamp | 
|  | // -- End  __kmp_hardware_timestamp | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // FUNCTION __kmp_bsr32 | 
|  | // | 
|  | // int | 
|  | // __kmp_bsr32( int ); | 
|  | .text | 
|  | PROC  __kmp_bsr32 | 
|  |  | 
|  | bsr    %edi,%eax | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_bsr32 | 
|  |  | 
|  | // ----------------------------------------------------------------------- | 
|  | #endif /* KMP_ARCH_X86_64 */ | 
|  |  | 
|  | // ' | 
|  | #if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // int | 
|  | // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), | 
|  | //                         int gtid, int tid, | 
|  | //                         int argc, void *p_argv[] | 
|  | // #if OMPT_SUPPORT | 
|  | //                         , | 
|  | //                         void **exit_frame_ptr | 
|  | // #endif | 
|  | //                       ) { | 
|  | // #if OMPT_SUPPORT | 
|  | //   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); | 
|  | // #endif | 
|  | // | 
|  | //   (*pkfn)( & gtid, & tid, argv[0], ... ); | 
|  | // | 
|  | // // FIXME: This is done at call-site and can be removed here. | 
|  | // #if OMPT_SUPPORT | 
|  | //   *exit_frame_ptr = 0; | 
|  | // #endif | 
|  | // | 
|  | //   return 1; | 
|  | // } | 
|  | // | 
|  | // parameters: | 
|  | //	x0:	pkfn | 
|  | //	w1:	gtid | 
|  | //	w2:	tid | 
|  | //	w3:	argc | 
|  | //	x4:	p_argv | 
|  | //	x5:	&exit_frame | 
|  | // | 
|  | // locals: | 
|  | //	__gtid:	gtid parm pushed on stack so can pass >id to pkfn | 
|  | //	__tid:	tid parm pushed on stack so can pass &tid to pkfn | 
|  | // | 
|  | // reg temps: | 
|  | //	 x8:	used to hold pkfn address | 
|  | //	 w9:	used as temporary for number of pkfn parms | 
|  | //	x10:	used to traverse p_argv array | 
|  | //	x11:	used as temporary for stack placement calculation | 
|  | //	x12:	used as temporary for stack parameters | 
|  | //	x19:	used to preserve exit_frame_ptr, callee-save | 
|  | // | 
|  | // return:	w0	(always 1/TRUE) | 
|  | // | 
|  |  | 
|  | __gtid = 4 | 
|  | __tid = 8 | 
|  |  | 
|  | // -- Begin __kmp_invoke_microtask | 
|  | // mark_begin; | 
|  | .text | 
|  | PROC __kmp_invoke_microtask | 
|  | PACBTI_C | 
|  |  | 
|  | stp	x29, x30, [sp, #-16]! | 
|  | # if OMPT_SUPPORT | 
|  | stp	x19, x20, [sp, #-16]! | 
|  | # endif | 
|  | mov	x29, sp | 
|  |  | 
|  | orr	w9, wzr, #1 | 
|  | add	w9, w9, w3, lsr #1 | 
|  | sub	sp, sp, w9, uxtw #4 | 
|  | mov	x11, sp | 
|  |  | 
|  | mov	x8, x0 | 
|  | str	w1, [x29, #-__gtid] | 
|  | str	w2, [x29, #-__tid] | 
|  | mov	w9, w3 | 
|  | mov	x10, x4 | 
|  | # if OMPT_SUPPORT | 
|  | mov	x19, x5 | 
|  | str	x29, [x19] | 
|  | # endif | 
|  |  | 
|  | sub	x0, x29, #__gtid | 
|  | sub	x1, x29, #__tid | 
|  |  | 
|  | cbz	w9, KMP_LABEL(kmp_1) | 
|  | ldr	x2, [x10] | 
|  |  | 
|  | sub	w9, w9, #1 | 
|  | cbz	w9, KMP_LABEL(kmp_1) | 
|  | ldr	x3, [x10, #8]! | 
|  |  | 
|  | sub	w9, w9, #1 | 
|  | cbz	w9, KMP_LABEL(kmp_1) | 
|  | ldr	x4, [x10, #8]! | 
|  |  | 
|  | sub	w9, w9, #1 | 
|  | cbz	w9, KMP_LABEL(kmp_1) | 
|  | ldr	x5, [x10, #8]! | 
|  |  | 
|  | sub	w9, w9, #1 | 
|  | cbz	w9, KMP_LABEL(kmp_1) | 
|  | ldr	x6, [x10, #8]! | 
|  |  | 
|  | sub	w9, w9, #1 | 
|  | cbz	w9, KMP_LABEL(kmp_1) | 
|  | ldr	x7, [x10, #8]! | 
|  |  | 
|  | KMP_LABEL(kmp_0): | 
|  | sub	w9, w9, #1 | 
|  | cbz	w9, KMP_LABEL(kmp_1) | 
|  | ldr	x12, [x10, #8]! | 
|  | str	x12, [x11], #8 | 
|  | b	KMP_LABEL(kmp_0) | 
|  | KMP_LABEL(kmp_1): | 
|  | blr	x8 | 
|  | orr	w0, wzr, #1 | 
|  | mov	sp, x29 | 
|  | # if OMPT_SUPPORT | 
|  | str	xzr, [x19] | 
|  | ldp	x19, x20, [sp], #16 | 
|  | # endif | 
|  | ldp	x29, x30, [sp], #16 | 
|  | PACBTI_RET | 
|  | ret | 
|  |  | 
|  | DEBUG_INFO __kmp_invoke_microtask | 
|  | // -- End  __kmp_invoke_microtask | 
|  |  | 
|  | #endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) */ | 
|  |  | 
|  | #if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // int | 
|  | // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), | 
|  | //                         int gtid, int tid, | 
|  | //                         int argc, void *p_argv[] | 
|  | // #if OMPT_SUPPORT | 
|  | //                         , | 
|  | //                         void **exit_frame_ptr | 
|  | // #endif | 
|  | //                       ) { | 
|  | // #if OMPT_SUPPORT | 
|  | //   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); | 
|  | // #endif | 
|  | // | 
|  | //   (*pkfn)( & gtid, & tid, argv[0], ... ); | 
|  | // | 
|  | // // FIXME: This is done at call-site and can be removed here. | 
|  | // #if OMPT_SUPPORT | 
|  | //   *exit_frame_ptr = 0; | 
|  | // #endif | 
|  | // | 
|  | //   return 1; | 
|  | // } | 
|  | // | 
|  | // parameters: | 
|  | //	r0:	pkfn | 
|  | //	r1:	gtid | 
|  | //	r2:	tid | 
|  | //	r3:	argc | 
|  | //	r4(stack):	p_argv | 
|  | //	r5(stack):	&exit_frame | 
|  | // | 
|  | // locals: | 
|  | //	__gtid:	gtid parm pushed on stack so can pass >id to pkfn | 
|  | //	__tid:	tid parm pushed on stack so can pass &tid to pkfn | 
|  | // | 
|  | // reg temps: | 
|  | //	 r4:	used to hold pkfn address | 
|  | //	 r5:	used as temporary for number of pkfn parms | 
|  | //	 r6:	used to traverse p_argv array | 
|  | //	 r7:	frame pointer (in some configurations) | 
|  | //	 r8:	used as temporary for stack placement calculation | 
|  | //	 	and as pointer to base of callee saved area | 
|  | //	 r9:	used as temporary for stack parameters | 
|  | //	r10:	used to preserve exit_frame_ptr, callee-save | 
|  | //	r11:	frame pointer (in some configurations) | 
|  | // | 
|  | // return:	r0	(always 1/TRUE) | 
|  | // | 
|  |  | 
|  | __gtid = 4 | 
|  | __tid = 8 | 
|  |  | 
|  | // -- Begin __kmp_invoke_microtask | 
|  | // mark_begin; | 
|  | .text | 
|  | PROC __kmp_invoke_microtask | 
|  |  | 
|  | // Pushing one extra register (r3) to keep the stack aligned | 
|  | // for when we call pkfn below | 
|  | push	{r3-r11,lr} | 
|  | // Load p_argv and &exit_frame | 
|  | ldr	r4, [sp, #10*4] | 
|  | # if OMPT_SUPPORT | 
|  | ldr	r5, [sp, #11*4] | 
|  | # endif | 
|  |  | 
|  | # if KMP_OS_DARWIN || (defined(__thumb__) && !KMP_OS_WINDOWS) | 
|  | # define FP r7 | 
|  | # define FPOFF 4*4 | 
|  | #else | 
|  | # define FP r11 | 
|  | # define FPOFF 8*4 | 
|  | #endif | 
|  | add	FP, sp, #FPOFF | 
|  | # if OMPT_SUPPORT | 
|  | mov	r10, r5 | 
|  | str	FP, [r10] | 
|  | # endif | 
|  | mov	r8, sp | 
|  |  | 
|  | // Calculate how much stack to allocate, in increments of 8 bytes. | 
|  | // We strictly need 4*(argc-2) bytes (2 arguments are passed in | 
|  | // registers) but allocate 4*argc for simplicity (to avoid needing | 
|  | // to handle the argc<2 cases). We align the number of bytes | 
|  | // allocated to 8 bytes, to keep the stack aligned. (Since we | 
|  | // already allocate more than enough, it's ok to round down | 
|  | // instead of up for the alignment.) We allocate another extra | 
|  | // 8 bytes for gtid and tid. | 
|  | mov	r5, #1 | 
|  | add	r5, r5, r3, lsr #1 | 
|  | sub	sp, sp, r5, lsl #3 | 
|  |  | 
|  | str	r1, [r8, #-__gtid] | 
|  | str	r2, [r8, #-__tid] | 
|  | mov	r5, r3 | 
|  | mov	r6, r4 | 
|  | mov	r4, r0 | 
|  |  | 
|  | // Prepare the first 2 parameters to pkfn - pointers to gtid and tid | 
|  | // in our stack frame. | 
|  | sub	r0, r8, #__gtid | 
|  | sub	r1, r8, #__tid | 
|  |  | 
|  | mov	r8, sp | 
|  |  | 
|  | // Load p_argv[0] and p_argv[1] into r2 and r3, if argc >= 1/2 | 
|  | cmp	r5, #0 | 
|  | beq	KMP_LABEL(kmp_1) | 
|  | ldr	r2, [r6] | 
|  |  | 
|  | subs	r5, r5, #1 | 
|  | beq	KMP_LABEL(kmp_1) | 
|  | ldr	r3, [r6, #4]! | 
|  |  | 
|  | // Loop, loading the rest of p_argv and writing the elements on the | 
|  | // stack. | 
|  | KMP_LABEL(kmp_0): | 
|  | subs	r5, r5, #1 | 
|  | beq	KMP_LABEL(kmp_1) | 
|  | ldr	r12, [r6, #4]! | 
|  | str	r12, [r8], #4 | 
|  | b	KMP_LABEL(kmp_0) | 
|  | KMP_LABEL(kmp_1): | 
|  | blx	r4 | 
|  | mov	r0, #1 | 
|  |  | 
|  | sub	r4, FP, #FPOFF | 
|  | mov	sp, r4 | 
|  | # undef FP | 
|  | # undef FPOFF | 
|  |  | 
|  | # if OMPT_SUPPORT | 
|  | mov	r1, #0 | 
|  | str	r1, [r10] | 
|  | # endif | 
|  | pop	{r3-r11,pc} | 
|  |  | 
|  | DEBUG_INFO __kmp_invoke_microtask | 
|  | // -- End  __kmp_invoke_microtask | 
|  |  | 
|  | #endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM */ | 
|  |  | 
|  | #if KMP_ARCH_PPC64 | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // int | 
|  | // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), | 
|  | //                         int gtid, int tid, | 
|  | //                         int argc, void *p_argv[] | 
|  | // #if OMPT_SUPPORT | 
|  | //                         , | 
|  | //                         void **exit_frame_ptr | 
|  | // #endif | 
|  | //                       ) { | 
|  | // #if OMPT_SUPPORT | 
|  | //   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); | 
|  | // #endif | 
|  | // | 
|  | //   (*pkfn)( & gtid, & tid, argv[0], ... ); | 
|  | // | 
|  | // // FIXME: This is done at call-site and can be removed here. | 
|  | // #if OMPT_SUPPORT | 
|  | //   *exit_frame_ptr = 0; | 
|  | // #endif | 
|  | // | 
|  | //   return 1; | 
|  | // } | 
|  | // | 
|  | // parameters: | 
|  | //	r3:	pkfn | 
|  | //	r4:	gtid | 
|  | //	r5:	tid | 
|  | //	r6:	argc | 
|  | //	r7:	p_argv | 
|  | //	r8:	&exit_frame | 
|  | // | 
|  | // return:	r3	(always 1/TRUE) | 
|  | // | 
|  | .text | 
|  | # if KMP_ARCH_PPC64_ELFv2 | 
|  | .abiversion 2 | 
|  | # endif | 
|  | .globl	__kmp_invoke_microtask | 
|  |  | 
|  | # if KMP_ARCH_PPC64_ELFv2 | 
|  | .p2align	4 | 
|  | # else | 
|  | .p2align	2 | 
|  | # endif | 
|  |  | 
|  | .type	__kmp_invoke_microtask,@function | 
|  |  | 
|  | # if KMP_ARCH_PPC64_ELFv2 | 
|  | __kmp_invoke_microtask: | 
|  | .Lfunc_begin0: | 
|  | .Lfunc_gep0: | 
|  | addis 2, 12, .TOC.-.Lfunc_gep0@ha | 
|  | addi 2, 2, .TOC.-.Lfunc_gep0@l | 
|  | .Lfunc_lep0: | 
|  | .localentry	__kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0 | 
|  | # else | 
|  | .section	.opd,"aw",@progbits | 
|  | __kmp_invoke_microtask: | 
|  | .p2align	3 | 
|  | .quad	.Lfunc_begin0 | 
|  | .quad	.TOC.@tocbase | 
|  | .quad	0 | 
|  | .text | 
|  | .Lfunc_begin0: | 
|  | # endif | 
|  |  | 
|  | // -- Begin __kmp_invoke_microtask | 
|  | // mark_begin; | 
|  |  | 
|  | // We need to allocate a stack frame large enough to hold all of the parameters | 
|  | // on the stack for the microtask plus what this function needs. That's 48 | 
|  | // bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the | 
|  | // parameters to the microtask, plus 8 bytes to store the values of r4 and r5, | 
|  | // and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes | 
|  | // to save r30 to hold a copy of r8. | 
|  |  | 
|  | .cfi_startproc | 
|  | mflr 0 | 
|  | std 31, -8(1) | 
|  | std 0, 16(1) | 
|  |  | 
|  | // This is unusual because normally we'd set r31 equal to r1 after the stack | 
|  | // frame is established. In this case, however, we need to dynamically compute | 
|  | // the stack frame size, and so we keep a direct copy of r1 to access our | 
|  | // register save areas and restore the r1 value before returning. | 
|  | mr 31, 1 | 
|  | .cfi_def_cfa_register r31 | 
|  | .cfi_offset r31, -8 | 
|  | .cfi_offset lr, 16 | 
|  |  | 
|  | // Compute the size necessary for the local stack frame. | 
|  | # if KMP_ARCH_PPC64_ELFv2 | 
|  | li 12, 72 | 
|  | # else | 
|  | li 12, 88 | 
|  | # endif | 
|  | sldi 0, 6, 3 | 
|  | add 12, 0, 12 | 
|  | neg 12, 12 | 
|  |  | 
|  | // We need to make sure that the stack frame stays aligned (to 16 bytes). | 
|  | li 0, -16 | 
|  | and 12, 0, 12 | 
|  |  | 
|  | // Establish the local stack frame. | 
|  | stdux 1, 1, 12 | 
|  |  | 
|  | # if OMPT_SUPPORT | 
|  | .cfi_offset r30, -16 | 
|  | std 30, -16(31) | 
|  | std 1, 0(8) | 
|  | mr 30, 8 | 
|  | # endif | 
|  |  | 
|  | // Store gtid and tid to the stack because they're passed by reference to the microtask. | 
|  | stw 4, -20(31) | 
|  | stw 5, -24(31) | 
|  |  | 
|  | mr 12, 6 | 
|  | mr 4, 7 | 
|  |  | 
|  | cmpwi 0, 12, 1 | 
|  | blt	 0, .Lcall | 
|  |  | 
|  | ld 5, 0(4) | 
|  |  | 
|  | cmpwi 0, 12, 2 | 
|  | blt	 0, .Lcall | 
|  |  | 
|  | ld 6, 8(4) | 
|  |  | 
|  | cmpwi 0, 12, 3 | 
|  | blt	 0, .Lcall | 
|  |  | 
|  | ld 7, 16(4) | 
|  |  | 
|  | cmpwi 0, 12, 4 | 
|  | blt	 0, .Lcall | 
|  |  | 
|  | ld 8, 24(4) | 
|  |  | 
|  | cmpwi 0, 12, 5 | 
|  | blt	 0, .Lcall | 
|  |  | 
|  | ld 9, 32(4) | 
|  |  | 
|  | cmpwi 0, 12, 6 | 
|  | blt	 0, .Lcall | 
|  |  | 
|  | ld 10, 40(4) | 
|  |  | 
|  | cmpwi 0, 12, 7 | 
|  | blt	 0, .Lcall | 
|  |  | 
|  | // There are more than 6 microtask parameters, so we need to store the | 
|  | // remainder to the stack. | 
|  | addi 12, 12, -6 | 
|  | mtctr 12 | 
|  |  | 
|  | // These are set to 8 bytes before the first desired store address (we're using | 
|  | // pre-increment loads and stores in the loop below). The parameter save area | 
|  | // for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and | 
|  | // 32 + 8*8 == 96 bytes above r1 for ELFv2. | 
|  | addi 4, 4, 40 | 
|  | # if KMP_ARCH_PPC64_ELFv2 | 
|  | addi 12, 1, 88 | 
|  | # else | 
|  | addi 12, 1, 104 | 
|  | # endif | 
|  |  | 
|  | .Lnext: | 
|  | ldu 0, 8(4) | 
|  | stdu 0, 8(12) | 
|  | bdnz .Lnext | 
|  |  | 
|  | .Lcall: | 
|  | # if KMP_ARCH_PPC64_ELFv2 | 
|  | std 2, 24(1) | 
|  | mr 12, 3 | 
|  | #else | 
|  | std 2, 40(1) | 
|  | // For ELFv1, we need to load the actual function address from the function descriptor. | 
|  | ld 12, 0(3) | 
|  | ld 2, 8(3) | 
|  | ld 11, 16(3) | 
|  | #endif | 
|  |  | 
|  | addi 3, 31, -20 | 
|  | addi 4, 31, -24 | 
|  |  | 
|  | mtctr 12 | 
|  | bctrl | 
|  | # if KMP_ARCH_PPC64_ELFv2 | 
|  | ld 2, 24(1) | 
|  | # else | 
|  | ld 2, 40(1) | 
|  | # endif | 
|  |  | 
|  | # if OMPT_SUPPORT | 
|  | li 3, 0 | 
|  | std 3, 0(30) | 
|  | # endif | 
|  |  | 
|  | li 3, 1 | 
|  |  | 
|  | # if OMPT_SUPPORT | 
|  | ld 30, -16(31) | 
|  | # endif | 
|  |  | 
|  | mr 1, 31 | 
|  | ld 0, 16(1) | 
|  | ld 31, -8(1) | 
|  | mtlr 0 | 
|  | blr | 
|  |  | 
|  | .long	0 | 
|  | .quad	0 | 
|  | .Lfunc_end0: | 
|  | .size	__kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0 | 
|  | .cfi_endproc | 
|  |  | 
|  | // -- End  __kmp_invoke_microtask | 
|  |  | 
|  | #endif /* KMP_ARCH_PPC64 */ | 
|  |  | 
|  | #if KMP_ARCH_RISCV64 | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // | 
|  | // typedef void (*microtask_t)(int *gtid, int *tid, ...); | 
|  | // | 
|  | // int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, | 
|  | //                            void *p_argv[] | 
|  | // #if OMPT_SUPPORT | 
|  | //                            , | 
|  | //                            void **exit_frame_ptr | 
|  | // #endif | 
|  | //                            ) { | 
|  | // #if OMPT_SUPPORT | 
|  | //   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); | 
|  | // #endif | 
|  | // | 
|  | //   (*pkfn)(>id, &tid, argv[0], ...); | 
|  | // | 
|  | //   return 1; | 
|  | // } | 
|  | // | 
|  | // Parameters: | 
|  | //   a0: pkfn | 
|  | //   a1: gtid | 
|  | //   a2: tid | 
|  | //   a3: argc | 
|  | //   a4: p_argv | 
|  | //   a5: exit_frame_ptr | 
|  | // | 
|  | // Locals: | 
|  | //   __gtid: gtid param pushed on stack so can pass >id to pkfn | 
|  | //   __tid: tid param pushed on stack so can pass &tid to pkfn | 
|  | // | 
|  | // Temp. registers: | 
|  | // | 
|  | //  t0: used to calculate the dynamic stack size / used to hold pkfn address | 
|  | //  t1: used as temporary for stack placement calculation | 
|  | //  t2: used as temporary for stack arguments | 
|  | //  t3: used as temporary for number of remaining pkfn parms | 
|  | //  t4: used to traverse p_argv array | 
|  | // | 
|  | // return: a0 (always 1/TRUE) | 
|  | // | 
|  |  | 
|  | __gtid = -20 | 
|  | __tid = -24 | 
|  |  | 
|  | // -- Begin __kmp_invoke_microtask | 
|  | // mark_begin; | 
|  | .text | 
|  | .globl	__kmp_invoke_microtask | 
|  | .p2align	1 | 
|  | .type	__kmp_invoke_microtask,@function | 
|  | __kmp_invoke_microtask: | 
|  | .cfi_startproc | 
|  |  | 
|  | // First, save ra and fp | 
|  | addi	sp, sp, -16 | 
|  | sd	ra, 8(sp) | 
|  | sd	fp, 0(sp) | 
|  | addi	fp, sp, 16 | 
|  | .cfi_def_cfa	fp, 0 | 
|  | .cfi_offset	ra, -8 | 
|  | .cfi_offset	fp, -16 | 
|  |  | 
|  | // Compute the dynamic stack size: | 
|  | // | 
|  | // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by | 
|  | //   reference | 
|  | // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' | 
|  | //   function by register. Given that we have 8 of such registers (a[0-7]) | 
|  | //   and two + 'argc' arguments (consider >id and &tid), we need to | 
|  | //   reserve max(0, argc - 6)*8 extra bytes | 
|  | // | 
|  | // The total number of bytes is then max(0, argc - 6)*8 + 8 | 
|  |  | 
|  | // Compute max(0, argc - 6) using the following bithack: | 
|  | // max(0, x) = x - (x & (x >> 31)), where x := argc - 6 | 
|  | // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax | 
|  | addi	t0, a3, -6 | 
|  | srai	t1, t0, 31 | 
|  | and	t1, t0, t1 | 
|  | sub	t0, t0, t1 | 
|  |  | 
|  | addi	t0, t0, 1 | 
|  |  | 
|  | slli	t0, t0, 3 | 
|  | sub	sp, sp, t0 | 
|  |  | 
|  | // Align the stack to 16 bytes | 
|  | andi	sp, sp, -16 | 
|  |  | 
|  | mv	t0, a0 | 
|  | mv	t3, a3 | 
|  | mv	t4, a4 | 
|  |  | 
|  | #if OMPT_SUPPORT | 
|  | // Save frame pointer into exit_frame | 
|  | sd	fp, 0(a5) | 
|  | #endif | 
|  |  | 
|  | // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) | 
|  |  | 
|  | sw	a1, __gtid(fp) | 
|  | sw	a2, __tid(fp) | 
|  |  | 
|  | addi	a0, fp, __gtid | 
|  | addi	a1, fp, __tid | 
|  |  | 
|  | beqz	t3, .L_kmp_3 | 
|  | ld	a2, 0(t4) | 
|  |  | 
|  | addi	t3, t3, -1 | 
|  | beqz	t3, .L_kmp_3 | 
|  | ld	a3, 8(t4) | 
|  |  | 
|  | addi	t3, t3, -1 | 
|  | beqz	t3, .L_kmp_3 | 
|  | ld	a4, 16(t4) | 
|  |  | 
|  | addi	t3, t3, -1 | 
|  | beqz	t3, .L_kmp_3 | 
|  | ld	a5, 24(t4) | 
|  |  | 
|  | addi	t3, t3, -1 | 
|  | beqz	t3, .L_kmp_3 | 
|  | ld	a6, 32(t4) | 
|  |  | 
|  | addi	t3, t3, -1 | 
|  | beqz	t3, .L_kmp_3 | 
|  | ld	a7, 40(t4) | 
|  |  | 
|  | // Prepare any additional argument passed through the stack | 
|  | addi	t4, t4, 48 | 
|  | mv	t1, sp | 
|  | j .L_kmp_2 | 
|  | .L_kmp_1: | 
|  | ld	t2, 0(t4) | 
|  | sd	t2, 0(t1) | 
|  | addi	t4, t4, 8 | 
|  | addi	t1, t1, 8 | 
|  | .L_kmp_2: | 
|  | addi	t3, t3, -1 | 
|  | bnez	t3, .L_kmp_1 | 
|  |  | 
|  | .L_kmp_3: | 
|  | // Call pkfn function | 
|  | jalr	t0 | 
|  |  | 
|  | // Restore stack and return | 
|  |  | 
|  | addi	a0, zero, 1 | 
|  |  | 
|  | addi	sp, fp, -16 | 
|  | ld	fp, 0(sp) | 
|  | ld	ra, 8(sp) | 
|  | addi	sp, sp, 16 | 
|  | ret | 
|  | .Lfunc_end0: | 
|  | .size	__kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask | 
|  | .cfi_endproc | 
|  |  | 
|  | // -- End  __kmp_invoke_microtask | 
|  |  | 
|  | #endif /* KMP_ARCH_RISCV64 */ | 
|  |  | 
|  | #if KMP_ARCH_LOONGARCH64 | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // | 
|  | // typedef void (*microtask_t)(int *gtid, int *tid, ...); | 
|  | // | 
|  | // int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, | 
|  | //                            void *p_argv[] | 
|  | // #if OMPT_SUPPORT | 
|  | //                            , | 
|  | //                            void **exit_frame_ptr | 
|  | // #endif | 
|  | //                            ) { | 
|  | // #if OMPT_SUPPORT | 
|  | //   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); | 
|  | // #endif | 
|  | // | 
|  | //   (*pkfn)(>id, &tid, argv[0], ...); | 
|  | // | 
|  | //   return 1; | 
|  | // } | 
|  | // | 
|  | // Parameters: | 
|  | //   a0: pkfn | 
|  | //   a1: gtid | 
|  | //   a2: tid | 
|  | //   a3: argc | 
|  | //   a4: p_argv | 
|  | //   a5: exit_frame_ptr | 
|  | // | 
|  | // Locals: | 
|  | //   __gtid: gtid param pushed on stack so can pass >id to pkfn | 
|  | //   __tid: tid param pushed on stack so can pass &tid to pkfn | 
|  | // | 
|  | // Temp registers: | 
|  | // | 
|  | //  t0: used to calculate the dynamic stack size / used to hold pkfn address | 
|  | //  t1: used as temporary for stack placement calculation | 
|  | //  t2: used as temporary for stack arguments | 
|  | //  t3: used as temporary for number of remaining pkfn parms | 
|  | //  t4: used to traverse p_argv array | 
|  | // | 
|  | // return: a0 (always 1/TRUE) | 
|  | // | 
|  |  | 
|  | // -- Begin __kmp_invoke_microtask | 
|  | // mark_begin; | 
|  | .text | 
|  | .globl	__kmp_invoke_microtask | 
|  | .p2align	2 | 
|  | .type	__kmp_invoke_microtask,@function | 
|  | __kmp_invoke_microtask: | 
|  | .cfi_startproc | 
|  |  | 
|  | // First, save ra and fp | 
|  | addi.d	$sp, $sp, -16 | 
|  | st.d	$ra, $sp, 8 | 
|  | st.d	$fp, $sp, 0 | 
|  | addi.d	$fp, $sp, 16 | 
|  | .cfi_def_cfa	22, 0 | 
|  | .cfi_offset	1, -8 | 
|  | .cfi_offset	22, -16 | 
|  |  | 
|  | // Compute the dynamic stack size: | 
|  | // | 
|  | // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by | 
|  | //   reference | 
|  | // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' | 
|  | //   function by register. Given that we have 8 of such registers (a[0-7]) | 
|  | //   and two + 'argc' arguments (consider >id and &tid), we need to | 
|  | //   reserve max(0, argc - 6)*8 extra bytes | 
|  | // | 
|  | // The total number of bytes is then max(0, argc - 6)*8 + 8 | 
|  |  | 
|  | addi.d  $t0, $a3, -6 | 
|  | slt  $t1, $t0, $zero | 
|  | masknez  $t0, $t0, $t1 | 
|  | addi.d  $t0, $t0, 1 | 
|  | slli.d	$t0, $t0, 3 | 
|  | sub.d	$sp, $sp, $t0 | 
|  |  | 
|  | // Align the stack to 16 bytes | 
|  | bstrins.d $sp, $zero, 3, 0 | 
|  |  | 
|  | move	$t0, $a0 | 
|  | move	$t3, $a3 | 
|  | move	$t4, $a4 | 
|  |  | 
|  | #if OMPT_SUPPORT | 
|  | // Save frame pointer into exit_frame | 
|  | st.d	$fp, $a5, 0 | 
|  | #endif | 
|  |  | 
|  | // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) | 
|  |  | 
|  | st.w	$a1, $fp, -20 | 
|  | st.w	$a2, $fp, -24 | 
|  |  | 
|  | addi.d	$a0, $fp, -20 | 
|  | addi.d	$a1, $fp, -24 | 
|  |  | 
|  | beqz	$t3, .L_kmp_3 | 
|  | ld.d	$a2, $t4, 0 | 
|  |  | 
|  | addi.d	$t3, $t3, -1 | 
|  | beqz	$t3, .L_kmp_3 | 
|  | ld.d	$a3, $t4, 8 | 
|  |  | 
|  | addi.d	$t3, $t3, -1 | 
|  | beqz	$t3, .L_kmp_3 | 
|  | ld.d	$a4, $t4, 16 | 
|  |  | 
|  | addi.d	$t3, $t3, -1 | 
|  | beqz	$t3, .L_kmp_3 | 
|  | ld.d	$a5, $t4, 24 | 
|  |  | 
|  | addi.d	$t3, $t3, -1 | 
|  | beqz	$t3, .L_kmp_3 | 
|  | ld.d	$a6, $t4, 32 | 
|  |  | 
|  | addi.d	$t3, $t3, -1 | 
|  | beqz	$t3, .L_kmp_3 | 
|  | ld.d	$a7, $t4, 40 | 
|  |  | 
|  | // Prepare any additional argument passed through the stack | 
|  | addi.d	$t4, $t4, 48 | 
|  | move	$t1, $sp | 
|  | b .L_kmp_2 | 
|  | .L_kmp_1: | 
|  | ld.d	$t2, $t4, 0 | 
|  | st.d	$t2, $t1, 0 | 
|  | addi.d	$t4, $t4, 8 | 
|  | addi.d	$t1, $t1, 8 | 
|  | .L_kmp_2: | 
|  | addi.d	$t3, $t3, -1 | 
|  | bnez	$t3, .L_kmp_1 | 
|  |  | 
|  | .L_kmp_3: | 
|  | // Call pkfn function | 
|  | jirl	$ra, $t0, 0 | 
|  |  | 
|  | // Restore stack and return | 
|  |  | 
|  | addi.d	$a0, $zero, 1 | 
|  |  | 
|  | addi.d	$sp, $fp, -16 | 
|  | ld.d	$fp, $sp, 0 | 
|  | ld.d	$ra, $sp, 8 | 
|  | addi.d	$sp, $sp, 16 | 
|  | jr $ra | 
|  | .Lfunc_end0: | 
|  | .size	__kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask | 
|  | .cfi_endproc | 
|  |  | 
|  | // -- End  __kmp_invoke_microtask | 
|  |  | 
|  | #endif /* KMP_ARCH_LOONGARCH64 */ | 
|  |  | 
|  | #if KMP_ARCH_VE | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // | 
|  | // typedef void (*microtask_t)(int *gtid, int *tid, ...); | 
|  | // | 
|  | // int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, | 
|  | //                            void *p_argv[] | 
|  | // #if OMPT_SUPPORT | 
|  | //                            , | 
|  | //                            void **exit_frame_ptr | 
|  | // #endif | 
|  | //                            ) { | 
|  | // #if OMPT_SUPPORT | 
|  | //   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); | 
|  | // #endif | 
|  | // | 
|  | //   (*pkfn)(>id, &tid, argv[0], ...); | 
|  | // | 
|  | //   return 1; | 
|  | // } | 
|  | // | 
|  | // Parameters: | 
|  | //   s0: pkfn | 
|  | //   s1: gtid | 
|  | //   s2: tid | 
|  | //   s3: argc | 
|  | //   s4: p_argv | 
|  | //   s5: exit_frame_ptr | 
|  | // | 
|  | // Locals: | 
|  | //   __gtid: gtid param pushed on stack so can pass >id to pkfn | 
|  | //   __tid: tid param pushed on stack so can pass &tid to pkfn | 
|  | // | 
|  | // Temp. registers: | 
|  | // | 
|  | //  s34: used to calculate the dynamic stack size | 
|  | //  s35: used as temporary for stack placement calculation | 
|  | //  s36: used as temporary for stack arguments | 
|  | //  s37: used as temporary for number of remaining pkfn parms | 
|  | //  s38: used to traverse p_argv array | 
|  | // | 
|  | // return: s0 (always 1/TRUE) | 
|  | // | 
|  |  | 
|  | __gtid = -4 | 
|  | __tid = -8 | 
|  |  | 
|  | // -- Begin __kmp_invoke_microtask | 
|  | // mark_begin; | 
|  | .text | 
|  | .globl	__kmp_invoke_microtask | 
|  | // A function requires 8 bytes align. | 
|  | .p2align	3 | 
|  | .type	__kmp_invoke_microtask,@function | 
|  | __kmp_invoke_microtask: | 
|  | .cfi_startproc | 
|  |  | 
|  | // First, save fp and lr.  VE stores them at caller stack frame. | 
|  | st	%fp, 0(, %sp) | 
|  | st	%lr, 8(, %sp) | 
|  | or	%fp, 0, %sp | 
|  | .cfi_def_cfa	%fp, 0 | 
|  | .cfi_offset	%lr, 8 | 
|  | .cfi_offset	%fp, 0 | 
|  |  | 
|  | // Compute the dynamic stack size: | 
|  | // | 
|  | // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them | 
|  | //   by reference | 
|  | // - We need 8 bytes for whole arguments.  We have two + 'argc' | 
|  | //   arguments (condider >id and &tid).  We need to reserve | 
|  | //   (argc + 2) * 8 bytes. | 
|  | // - We need 176 bytes for RSA and others | 
|  | // | 
|  | // The total number of bytes is then (argc + 2) * 8 + 8 + 176. | 
|  | // | 
|  | // |------------------------------| | 
|  | // | return address of callee     | 8(%fp) | 
|  | // |------------------------------| | 
|  | // | frame pointer of callee      | 0(%fp) | 
|  | // |------------------------------| <------------------ %fp | 
|  | // | __tid / __gtid               | -8(%fp) / -4(%fp) | 
|  | // |------------------------------| | 
|  | // | argc+2 for arguments         | 176(%sp) | 
|  | // |------------------------------| | 
|  | // | RSA                          | | 
|  | // |------------------------------| | 
|  | // | return address               | | 
|  | // |------------------------------| | 
|  | // | frame pointer                | | 
|  | // |------------------------------| <------------------ %sp | 
|  |  | 
|  | adds.w.sx	%s34, 2, %s3 | 
|  | sll	%s34, %s34, 3 | 
|  | lea	%s34, 184(, %s34) | 
|  | subs.l	%sp, %sp, %s34 | 
|  |  | 
|  | // Align the stack to 16 bytes. | 
|  | and	%sp, -16, %sp | 
|  |  | 
|  | // Save pkfn. | 
|  | or	%s12, 0, %s0 | 
|  |  | 
|  | // Call host to allocate stack if it is necessary. | 
|  | brge.l	%sp, %sl, .L_kmp_pass | 
|  | ld	%s61, 24(, %tp) | 
|  | lea	%s63, 0x13b | 
|  | shm.l	%s63, 0(%s61) | 
|  | shm.l	%sl, 8(%s61) | 
|  | shm.l	%sp, 16(%s61) | 
|  | monc | 
|  |  | 
|  | .L_kmp_pass: | 
|  | lea	%s35, 176(, %sp) | 
|  | adds.w.sx	%s37, 0, %s3 | 
|  | or	%s38, 0, %s4 | 
|  |  | 
|  | #if OMPT_SUPPORT | 
|  | // Save frame pointer into exit_frame. | 
|  | st	%fp, 0(%s5) | 
|  | #endif | 
|  |  | 
|  | // Prepare arguments for the pkfn function (first 8 using s0-s7 | 
|  | // registers, but need to store stack also because of varargs). | 
|  |  | 
|  | stl	%s1, __gtid(%fp) | 
|  | stl	%s2, __tid(%fp) | 
|  |  | 
|  | adds.l	%s0, __gtid, %fp | 
|  | st	%s0, 0(, %s35) | 
|  | adds.l	%s1, __tid, %fp | 
|  | st	%s1, 8(, %s35) | 
|  |  | 
|  | breq.l	0, %s37, .L_kmp_call | 
|  | ld	%s2, 0(, %s38) | 
|  | st	%s2, 16(, %s35) | 
|  |  | 
|  | breq.l	1, %s37, .L_kmp_call | 
|  | ld	%s3, 8(, %s38) | 
|  | st	%s3, 24(, %s35) | 
|  |  | 
|  | breq.l	2, %s37, .L_kmp_call | 
|  | ld	%s4, 16(, %s38) | 
|  | st	%s4, 32(, %s35) | 
|  |  | 
|  | breq.l	3, %s37, .L_kmp_call | 
|  | ld	%s5, 24(, %s38) | 
|  | st	%s5, 40(, %s35) | 
|  |  | 
|  | breq.l	4, %s37, .L_kmp_call | 
|  | ld	%s6, 32(, %s38) | 
|  | st	%s6, 48(, %s35) | 
|  |  | 
|  | breq.l	5, %s37, .L_kmp_call | 
|  | ld	%s7, 40(, %s38) | 
|  | st	%s7, 56(, %s35) | 
|  |  | 
|  | breq.l	6, %s37, .L_kmp_call | 
|  |  | 
|  | // Prepare any additional argument passed through the stack. | 
|  | adds.l	%s37, -6, %s37 | 
|  | lea	%s38, 48(, %s38) | 
|  | lea	%s35, 64(, %s35) | 
|  | .L_kmp_loop: | 
|  | ld	%s36, 0(, %s38) | 
|  | st	%s36, 0(, %s35) | 
|  | adds.l	%s37, -1, %s37 | 
|  | adds.l	%s38, 8, %s38 | 
|  | adds.l	%s35, 8, %s35 | 
|  | brne.l	0, %s37, .L_kmp_loop | 
|  |  | 
|  | .L_kmp_call: | 
|  | // Call pkfn function. | 
|  | bsic	%lr, (, %s12) | 
|  |  | 
|  | // Return value. | 
|  | lea	%s0, 1 | 
|  |  | 
|  | // Restore stack and return. | 
|  | or	%sp, 0, %fp | 
|  | ld	%lr, 8(, %sp) | 
|  | ld	%fp, 0(, %sp) | 
|  | b.l.t	(, %lr) | 
|  | .Lfunc_end0: | 
|  | .size	__kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask | 
|  | .cfi_endproc | 
|  |  | 
|  | // -- End  __kmp_invoke_microtask | 
|  |  | 
|  | #endif /* KMP_ARCH_VE */ | 
|  |  | 
|  | #if KMP_ARCH_S390X | 
|  |  | 
|  | //------------------------------------------------------------------------ | 
|  | // | 
|  | // typedef void (*microtask_t)(int *gtid, int *tid, ...); | 
|  | // | 
|  | // int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, | 
|  | //                            void *p_argv[] | 
|  | // #if OMPT_SUPPORT | 
|  | //                            , | 
|  | //                            void **exit_frame_ptr | 
|  | // #endif | 
|  | //                            ) { | 
|  | // #if OMPT_SUPPORT | 
|  | //   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); | 
|  | // #endif | 
|  | // | 
|  | //   (*pkfn)(>id, &tid, argv[0], ...); | 
|  | // | 
|  | //   return 1; | 
|  | // } | 
|  | // | 
|  | // Parameters: | 
|  | //   r2: pkfn | 
|  | //   r3: gtid | 
|  | //   r4: tid | 
|  | //   r5: argc | 
|  | //   r6: p_argv | 
|  | //   SP+160: exit_frame_ptr | 
|  | // | 
|  | // Locals: | 
|  | //   __gtid: gtid param pushed on stack so can pass >id to pkfn | 
|  | //   __tid: tid param pushed on stack so can pass &tid to pkfn | 
|  | // | 
|  | // Temp. registers: | 
|  | // | 
|  | //  r0: used to fetch argv slots | 
|  | //  r7: used as temporary for number of remaining pkfn parms | 
|  | //  r8: argv | 
|  | //  r9: pkfn | 
|  | //  r10: stack size | 
|  | //  r11: previous fp | 
|  | //  r12: stack parameter area | 
|  | //  r13: argv slot | 
|  | // | 
|  | // return: r2 (always 1/TRUE) | 
|  | // | 
|  |  | 
|  | // -- Begin __kmp_invoke_microtask | 
|  | // mark_begin; | 
|  | .text | 
|  | .globl	__kmp_invoke_microtask | 
|  | .p2align	1 | 
|  | .type	__kmp_invoke_microtask,@function | 
|  | __kmp_invoke_microtask: | 
|  | .cfi_startproc | 
|  |  | 
|  | stmg	%r6,%r14,48(%r15) | 
|  | .cfi_offset %r6, -112 | 
|  | .cfi_offset %r7, -104 | 
|  | .cfi_offset %r8, -96 | 
|  | .cfi_offset %r9, -88 | 
|  | .cfi_offset %r10, -80 | 
|  | .cfi_offset %r11, -72 | 
|  | .cfi_offset %r12, -64 | 
|  | .cfi_offset %r13, -56 | 
|  | .cfi_offset %r14, -48 | 
|  | .cfi_offset %r15, -40 | 
|  | lgr	%r11,%r15 | 
|  | .cfi_def_cfa %r11, 160 | 
|  |  | 
|  | // Compute the dynamic stack size: | 
|  | // | 
|  | // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by | 
|  | //   reference | 
|  | // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' | 
|  | //   function by register. Given that we have 5 of such registers (r[2-6]) | 
|  | //   and two + 'argc' arguments (consider >id and &tid), we need to | 
|  | //   reserve max(0, argc - 3)*8 extra bytes | 
|  | // | 
|  | // The total number of bytes is then max(0, argc - 3)*8 + 8 | 
|  |  | 
|  | lgr	%r10,%r5 | 
|  | aghi	%r10,-2 | 
|  | jnm	0f | 
|  | lghi	%r10,0 | 
|  | 0: | 
|  | sllg	%r10,%r10,3 | 
|  | lgr	%r12,%r10 | 
|  | aghi	%r10,176 | 
|  | sgr 	%r15,%r10 | 
|  | agr	%r12,%r15 | 
|  | stg	%r11,0(%r15) | 
|  |  | 
|  | lgr	%r9,%r2			// pkfn | 
|  |  | 
|  | #if OMPT_SUPPORT | 
|  | // Save frame pointer into exit_frame | 
|  | lg	%r8,160(%r11) | 
|  | stg	%r11,0(%r8) | 
|  | #endif | 
|  |  | 
|  | // Prepare arguments for the pkfn function (first 5 using r2-r6 registers) | 
|  |  | 
|  | stg     %r3,160(%r12) | 
|  | la	%r2,164(%r12)		// gid | 
|  | stg	%r4,168(%r12) | 
|  | la	%r3,172(%r12)		// tid | 
|  | lgr	%r8,%r6			// argv | 
|  |  | 
|  | // If argc > 0 | 
|  | ltgr	%r7,%r5 | 
|  | jz	1f | 
|  |  | 
|  | lg	%r4,0(%r8)		// argv[0] | 
|  | aghi	%r7,-1 | 
|  | jz	1f | 
|  |  | 
|  | // If argc > 1 | 
|  | lg	%r5,8(%r8)		// argv[1] | 
|  | aghi	%r7,-1 | 
|  | jz	1f | 
|  |  | 
|  | // If argc > 2 | 
|  | lg	%r6,16(%r8)		// argv[2] | 
|  | aghi	%r7,-1 | 
|  | jz	1f | 
|  |  | 
|  | lghi	%r13,0			// Index [n] | 
|  | 2: | 
|  | lg	%r0,24(%r13,%r8)	// argv[2+n] | 
|  | stg	%r0,160(%r13,%r15)	// parm[2+n] | 
|  | aghi	%r13,8			// Next | 
|  | aghi	%r7,-1 | 
|  | jnz	2b | 
|  |  | 
|  | 1: | 
|  | basr	%r14,%r9		// Call pkfn | 
|  |  | 
|  | // Restore stack and return | 
|  |  | 
|  | lgr	%r15,%r11 | 
|  | lmg	%r6,%r14,48(%r15) | 
|  | lghi	%r2,1 | 
|  | br	%r14 | 
|  | .Lfunc_end0: | 
|  | .size	__kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask | 
|  | .cfi_endproc | 
|  |  | 
|  | // -- End  __kmp_invoke_microtask | 
|  |  | 
|  | #endif /* KMP_ARCH_S390X */ | 
|  |  | 
|  | #if KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 || KMP_ARCH_SPARC32 | 
|  | #ifndef KMP_PREFIX_UNDERSCORE | 
|  | # define KMP_PREFIX_UNDERSCORE(x) x | 
|  | #endif | 
|  | .data | 
|  | COMMON .gomp_critical_user_, 32, 3 | 
|  | .data | 
|  | .align 4 | 
|  | .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) | 
|  | KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): | 
|  | .4byte .gomp_critical_user_ | 
|  | #ifdef __ELF__ | 
|  | .type KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),%object | 
|  | .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),4 | 
|  | #endif | 
|  | #endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 || KMP_ARCH_SPARC32 */ | 
|  |  | 
|  | #if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||                   \ | 
|  | KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE ||                 \ | 
|  | KMP_ARCH_S390X || KMP_ARCH_SPARC64 | 
|  | #ifndef KMP_PREFIX_UNDERSCORE | 
|  | # define KMP_PREFIX_UNDERSCORE(x) x | 
|  | #endif | 
|  | .data | 
|  | COMMON .gomp_critical_user_, 32, 3 | 
|  | .data | 
|  | .align 8 | 
|  | .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) | 
|  | KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): | 
|  | .8byte .gomp_critical_user_ | 
|  | #ifdef __ELF__ | 
|  | .type KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),%object | 
|  | .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 | 
|  | #endif | 
|  | #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || | 
|  | KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || | 
|  | KMP_ARCH_S390X || KMP_ARCH_SPARC64 */ | 
|  |  | 
|  | #if KMP_OS_LINUX | 
|  | # if KMP_ARCH_ARM || KMP_ARCH_AARCH64 | 
|  | .section .note.GNU-stack,"",%progbits | 
|  | # elif !KMP_ARCH_WASM | 
|  | .section .note.GNU-stack,"",@progbits | 
|  | # endif | 
|  | #endif | 
|  |  | 
|  | #if KMP_OS_LINUX && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) | 
|  | GNU_PROPERTY_BTI_PAC | 
|  | #endif |