| // z_Linux_asm.S: - microtasking routines specifically |
| // written for Intel platforms running Linux* OS |
| |
| // |
| ////===----------------------------------------------------------------------===// |
| //// |
| //// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| //// See https://llvm.org/LICENSE.txt for license information. |
| //// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| //// |
| ////===----------------------------------------------------------------------===// |
| // |
| |
| // ----------------------------------------------------------------------- |
| // macros |
| // ----------------------------------------------------------------------- |
| |
| #include "kmp_config.h" |
| |
| #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| |
| # if KMP_MIC |
| // the 'delay r16/r32/r64' should be used instead of the 'pause'. |
| // The delay operation has the effect of removing the current thread from |
| // the round-robin HT mechanism, and therefore speeds up the issue rate of |
| // the other threads on the same core. |
| // |
| // A value of 0 works fine for <= 2 threads per core, but causes the EPCC |
| // barrier time to increase greatly for 3 or more threads per core. |
| // |
| // A value of 100 works pretty well for up to 4 threads per core, but isn't |
| // quite as fast as 0 for 2 threads per core. |
| // |
| // We need to check what happens for oversubscription / > 4 threads per core. |
| // It is possible that we need to pass the delay value in as a parameter |
| // that the caller determines based on the total # threads / # cores. |
| // |
| //.macro pause_op |
| // mov $100, %rax |
| // delay %rax |
| //.endm |
| # else |
| # define pause_op .byte 0xf3,0x90 |
| # endif // KMP_MIC |
| |
| # if KMP_OS_DARWIN |
| # define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols |
| # define KMP_LABEL(x) L_##x // form the name of label |
| .macro KMP_CFI_DEF_OFFSET |
| .endmacro |
| .macro KMP_CFI_OFFSET |
| .endmacro |
| .macro KMP_CFI_REGISTER |
| .endmacro |
| .macro KMP_CFI_DEF |
| .endmacro |
| .macro ALIGN |
| .align $0 |
| .endmacro |
| .macro DEBUG_INFO |
| /* Not sure what .size does in icc, not sure if we need to do something |
| similar for OS X*. |
| */ |
| .endmacro |
| .macro PROC |
| ALIGN 4 |
| .globl KMP_PREFIX_UNDERSCORE($0) |
| KMP_PREFIX_UNDERSCORE($0): |
| .endmacro |
| # else // KMP_OS_DARWIN |
| # define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols |
| // Format labels so that they don't override function names in gdb's backtraces |
| // MIC assembler doesn't accept .L syntax, the L works fine there (as well as |
| // on OS X*) |
| # if KMP_MIC |
| # define KMP_LABEL(x) L_##x // local label |
| # else |
| # define KMP_LABEL(x) .L_##x // local label hidden from backtraces |
| # endif // KMP_MIC |
| .macro ALIGN size |
| .align 1<<(\size) |
| .endm |
| .macro DEBUG_INFO proc |
| .cfi_endproc |
| // Not sure why we need .type and .size for the functions |
| .align 16 |
| .type \proc,@function |
| .size \proc,.-\proc |
| .endm |
| .macro PROC proc |
| ALIGN 4 |
| .globl KMP_PREFIX_UNDERSCORE(\proc) |
| KMP_PREFIX_UNDERSCORE(\proc): |
| .cfi_startproc |
| .endm |
| .macro KMP_CFI_DEF_OFFSET sz |
| .cfi_def_cfa_offset \sz |
| .endm |
| .macro KMP_CFI_OFFSET reg, sz |
| .cfi_offset \reg,\sz |
| .endm |
| .macro KMP_CFI_REGISTER reg |
| .cfi_def_cfa_register \reg |
| .endm |
| .macro KMP_CFI_DEF reg, sz |
| .cfi_def_cfa \reg,\sz |
| .endm |
| # endif // KMP_OS_DARWIN |
| #endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 |
| |
| #if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 |
| |
| # if KMP_OS_DARWIN |
| # define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols |
| # define KMP_LABEL(x) L_##x // form the name of label |
| |
| .macro ALIGN |
| .align $0 |
| .endmacro |
| |
| .macro DEBUG_INFO |
| /* Not sure what .size does in icc, not sure if we need to do something |
| similar for OS X*. |
| */ |
| .endmacro |
| |
| .macro PROC |
| ALIGN 4 |
| .globl KMP_PREFIX_UNDERSCORE($0) |
| KMP_PREFIX_UNDERSCORE($0): |
| .endmacro |
| # else // KMP_OS_DARWIN |
| # define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols |
| // Format labels so that they don't override function names in gdb's backtraces |
| # define KMP_LABEL(x) .L_##x // local label hidden from backtraces |
| |
| .macro ALIGN size |
| .align 1<<(\size) |
| .endm |
| |
| .macro DEBUG_INFO proc |
| .cfi_endproc |
| // Not sure why we need .type and .size for the functions |
| ALIGN 2 |
| .type \proc,@function |
| .size \proc,.-\proc |
| .endm |
| |
| .macro PROC proc |
| ALIGN 2 |
| .globl KMP_PREFIX_UNDERSCORE(\proc) |
| KMP_PREFIX_UNDERSCORE(\proc): |
| .cfi_startproc |
| .endm |
| # endif // KMP_OS_DARWIN |
| |
| #endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 |
| |
| // ----------------------------------------------------------------------- |
| // data |
| // ----------------------------------------------------------------------- |
| |
| #ifdef KMP_GOMP_COMPAT |
| |
| // Support for unnamed common blocks. |
| // |
| // Because the symbol ".gomp_critical_user_" contains a ".", we have to |
| // put this stuff in assembly. |
| |
| # if KMP_ARCH_X86 |
| # if KMP_OS_DARWIN |
| .data |
| .comm .gomp_critical_user_,32 |
| .data |
| .globl ___kmp_unnamed_critical_addr |
| ___kmp_unnamed_critical_addr: |
| .long .gomp_critical_user_ |
| # else /* Linux* OS */ |
| .data |
| .comm .gomp_critical_user_,32,8 |
| .data |
| ALIGN 4 |
| .global __kmp_unnamed_critical_addr |
| __kmp_unnamed_critical_addr: |
| .4byte .gomp_critical_user_ |
| .type __kmp_unnamed_critical_addr,@object |
| .size __kmp_unnamed_critical_addr,4 |
| # endif /* KMP_OS_DARWIN */ |
| # endif /* KMP_ARCH_X86 */ |
| |
| # if KMP_ARCH_X86_64 |
| # if KMP_OS_DARWIN |
| .data |
| .comm .gomp_critical_user_,32 |
| .data |
| .globl ___kmp_unnamed_critical_addr |
| ___kmp_unnamed_critical_addr: |
| .quad .gomp_critical_user_ |
| # else /* Linux* OS */ |
| .data |
| .comm .gomp_critical_user_,32,8 |
| .data |
| ALIGN 8 |
| .global __kmp_unnamed_critical_addr |
| __kmp_unnamed_critical_addr: |
| .8byte .gomp_critical_user_ |
| .type __kmp_unnamed_critical_addr,@object |
| .size __kmp_unnamed_critical_addr,8 |
| # endif /* KMP_OS_DARWIN */ |
| # endif /* KMP_ARCH_X86_64 */ |
| |
| #endif /* KMP_GOMP_COMPAT */ |
| |
| |
| #if KMP_ARCH_X86 && !KMP_ARCH_PPC64 |
| |
| // ----------------------------------------------------------------------- |
| // microtasking routines specifically written for IA-32 architecture |
| // running Linux* OS |
| // ----------------------------------------------------------------------- |
| |
| .ident "Intel Corporation" |
| .data |
| ALIGN 4 |
| // void |
| // __kmp_x86_pause( void ); |
| |
| .text |
| PROC __kmp_x86_pause |
| |
| pause_op |
| ret |
| |
| DEBUG_INFO __kmp_x86_pause |
| |
| # if !KMP_ASM_INTRINS |
| |
| //------------------------------------------------------------------------ |
| // kmp_int32 |
| // __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); |
| |
| PROC __kmp_test_then_add32 |
| |
| movl 4(%esp), %ecx |
| movl 8(%esp), %eax |
| lock |
| xaddl %eax,(%ecx) |
| ret |
| |
| DEBUG_INFO __kmp_test_then_add32 |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_xchg_fixed8 |
| // |
| // kmp_int32 |
| // __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); |
| // |
| // parameters: |
| // p: 4(%esp) |
| // d: 8(%esp) |
| // |
| // return: %al |
| PROC __kmp_xchg_fixed8 |
| |
| movl 4(%esp), %ecx // "p" |
| movb 8(%esp), %al // "d" |
| |
| lock |
| xchgb %al,(%ecx) |
| ret |
| |
| DEBUG_INFO __kmp_xchg_fixed8 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_xchg_fixed16 |
| // |
| // kmp_int16 |
| // __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); |
| // |
| // parameters: |
| // p: 4(%esp) |
| // d: 8(%esp) |
| // return: %ax |
| PROC __kmp_xchg_fixed16 |
| |
| movl 4(%esp), %ecx // "p" |
| movw 8(%esp), %ax // "d" |
| |
| lock |
| xchgw %ax,(%ecx) |
| ret |
| |
| DEBUG_INFO __kmp_xchg_fixed16 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_xchg_fixed32 |
| // |
| // kmp_int32 |
| // __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); |
| // |
| // parameters: |
| // p: 4(%esp) |
| // d: 8(%esp) |
| // |
| // return: %eax |
| PROC __kmp_xchg_fixed32 |
| |
| movl 4(%esp), %ecx // "p" |
| movl 8(%esp), %eax // "d" |
| |
| lock |
| xchgl %eax,(%ecx) |
| ret |
| |
| DEBUG_INFO __kmp_xchg_fixed32 |
| |
| |
| // kmp_int8 |
| // __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); |
| PROC __kmp_compare_and_store8 |
| |
| movl 4(%esp), %ecx |
| movb 8(%esp), %al |
| movb 12(%esp), %dl |
| lock |
| cmpxchgb %dl,(%ecx) |
| sete %al // if %al == (%ecx) set %al = 1 else set %al = 0 |
| and $1, %eax // sign extend previous instruction |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store8 |
| |
| // kmp_int16 |
| // __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv); |
| PROC __kmp_compare_and_store16 |
| |
| movl 4(%esp), %ecx |
| movw 8(%esp), %ax |
| movw 12(%esp), %dx |
| lock |
| cmpxchgw %dx,(%ecx) |
| sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0 |
| and $1, %eax // sign extend previous instruction |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store16 |
| |
| // kmp_int32 |
| // __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv); |
| PROC __kmp_compare_and_store32 |
| |
| movl 4(%esp), %ecx |
| movl 8(%esp), %eax |
| movl 12(%esp), %edx |
| lock |
| cmpxchgl %edx,(%ecx) |
| sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 |
| and $1, %eax // sign extend previous instruction |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store32 |
| |
| // kmp_int32 |
| // __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s ); |
| PROC __kmp_compare_and_store64 |
| |
| pushl %ebp |
| movl %esp, %ebp |
| pushl %ebx |
| pushl %edi |
| movl 8(%ebp), %edi |
| movl 12(%ebp), %eax // "cv" low order word |
| movl 16(%ebp), %edx // "cv" high order word |
| movl 20(%ebp), %ebx // "sv" low order word |
| movl 24(%ebp), %ecx // "sv" high order word |
| lock |
| cmpxchg8b (%edi) |
| sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 |
| and $1, %eax // sign extend previous instruction |
| popl %edi |
| popl %ebx |
| movl %ebp, %esp |
| popl %ebp |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store64 |
| |
| // kmp_int8 |
| // __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); |
| PROC __kmp_compare_and_store_ret8 |
| |
| movl 4(%esp), %ecx |
| movb 8(%esp), %al |
| movb 12(%esp), %dl |
| lock |
| cmpxchgb %dl,(%ecx) |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store_ret8 |
| |
| // kmp_int16 |
| // __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv, |
| // kmp_int16 sv); |
| PROC __kmp_compare_and_store_ret16 |
| |
| movl 4(%esp), %ecx |
| movw 8(%esp), %ax |
| movw 12(%esp), %dx |
| lock |
| cmpxchgw %dx,(%ecx) |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store_ret16 |
| |
| // kmp_int32 |
| // __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv, |
| // kmp_int32 sv); |
| PROC __kmp_compare_and_store_ret32 |
| |
| movl 4(%esp), %ecx |
| movl 8(%esp), %eax |
| movl 12(%esp), %edx |
| lock |
| cmpxchgl %edx,(%ecx) |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store_ret32 |
| |
| // kmp_int64 |
| // __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv, |
| // kmp_int64 sv); |
| PROC __kmp_compare_and_store_ret64 |
| |
| pushl %ebp |
| movl %esp, %ebp |
| pushl %ebx |
| pushl %edi |
| movl 8(%ebp), %edi |
| movl 12(%ebp), %eax // "cv" low order word |
| movl 16(%ebp), %edx // "cv" high order word |
| movl 20(%ebp), %ebx // "sv" low order word |
| movl 24(%ebp), %ecx // "sv" high order word |
| lock |
| cmpxchg8b (%edi) |
| popl %edi |
| popl %ebx |
| movl %ebp, %esp |
| popl %ebp |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store_ret64 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_xchg_real32 |
| // |
| // kmp_real32 |
| // __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); |
| // |
| // parameters: |
| // addr: 4(%esp) |
| // data: 8(%esp) |
| // |
| // return: %eax |
| PROC __kmp_xchg_real32 |
| |
| pushl %ebp |
| movl %esp, %ebp |
| subl $4, %esp |
| pushl %esi |
| |
| movl 4(%ebp), %esi |
| flds (%esi) |
| // load <addr> |
| fsts -4(%ebp) |
| // store old value |
| |
| movl 8(%ebp), %eax |
| |
| lock |
| xchgl %eax, (%esi) |
| |
| flds -4(%ebp) |
| // return old value |
| |
| popl %esi |
| movl %ebp, %esp |
| popl %ebp |
| ret |
| |
| DEBUG_INFO __kmp_xchg_real32 |
| |
| # endif /* !KMP_ASM_INTRINS */ |
| |
| //------------------------------------------------------------------------ |
| // int |
| // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), |
| // int gtid, int tid, |
| // int argc, void *p_argv[] |
| // #if OMPT_SUPPORT |
| // , |
| // void **exit_frame_ptr |
| // #endif |
| // ) { |
| // #if OMPT_SUPPORT |
| // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); |
| // #endif |
| // |
| // (*pkfn)( & gtid, & tid, argv[0], ... ); |
| // return 1; |
| // } |
| |
| // -- Begin __kmp_invoke_microtask |
| // mark_begin; |
| PROC __kmp_invoke_microtask |
| |
| pushl %ebp |
| KMP_CFI_DEF_OFFSET 8 |
| KMP_CFI_OFFSET ebp,-8 |
| movl %esp,%ebp // establish the base pointer for this routine. |
| KMP_CFI_REGISTER ebp |
| subl $8,%esp // allocate space for two local variables. |
| // These varibales are: |
| // argv: -4(%ebp) |
| // temp: -8(%ebp) |
| // |
| pushl %ebx // save %ebx to use during this routine |
| // |
| #if OMPT_SUPPORT |
| movl 28(%ebp),%ebx // get exit_frame address |
| movl %ebp,(%ebx) // save exit_frame |
| #endif |
| |
| movl 20(%ebp),%ebx // Stack alignment - # args |
| addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid) |
| shll $2,%ebx // Number of bytes used on stack: (#args+2)*4 |
| movl %esp,%eax // |
| subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this |
| movl %eax,%ebx // Save to %ebx |
| andl $0xFFFFFF80,%eax // mask off 7 bits |
| subl %eax,%ebx // Amount to subtract from %esp |
| subl %ebx,%esp // Prepare the stack ptr -- |
| // now it will be aligned on 128-byte boundary at the call |
| |
| movl 24(%ebp),%eax // copy from p_argv[] |
| movl %eax,-4(%ebp) // into the local variable *argv. |
| |
| movl 20(%ebp),%ebx // argc is 20(%ebp) |
| shll $2,%ebx |
| |
| KMP_LABEL(invoke_2): |
| cmpl $0,%ebx |
| jg KMP_LABEL(invoke_4) |
| jmp KMP_LABEL(invoke_3) |
| ALIGN 2 |
| KMP_LABEL(invoke_4): |
| movl -4(%ebp),%eax |
| subl $4,%ebx // decrement argc. |
| addl %ebx,%eax // index into argv. |
| movl (%eax),%edx |
| pushl %edx |
| |
| jmp KMP_LABEL(invoke_2) |
| ALIGN 2 |
| KMP_LABEL(invoke_3): |
| leal 16(%ebp),%eax // push & tid |
| pushl %eax |
| |
| leal 12(%ebp),%eax // push & gtid |
| pushl %eax |
| |
| movl 8(%ebp),%ebx |
| call *%ebx // call (*pkfn)(); |
| |
| movl $1,%eax // return 1; |
| |
| movl -12(%ebp),%ebx // restore %ebx |
| leave |
| KMP_CFI_DEF esp,4 |
| ret |
| |
| DEBUG_INFO __kmp_invoke_microtask |
| // -- End __kmp_invoke_microtask |
| |
| |
| // kmp_uint64 |
| // __kmp_hardware_timestamp(void) |
| PROC __kmp_hardware_timestamp |
| rdtsc |
| ret |
| |
| DEBUG_INFO __kmp_hardware_timestamp |
| // -- End __kmp_hardware_timestamp |
| |
| #endif /* KMP_ARCH_X86 */ |
| |
| |
| #if KMP_ARCH_X86_64 |
| |
| // ----------------------------------------------------------------------- |
| // microtasking routines specifically written for IA-32 architecture and |
| // Intel(R) 64 running Linux* OS |
| // ----------------------------------------------------------------------- |
| |
| // -- Machine type P |
| // mark_description "Intel Corporation"; |
| .ident "Intel Corporation" |
| // -- .file "z_Linux_asm.S" |
| .data |
| ALIGN 4 |
| |
| // To prevent getting our code into .data section .text added to every routine |
| // definition for x86_64. |
| //------------------------------------------------------------------------ |
| # if !KMP_ASM_INTRINS |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_test_then_add32 |
| // |
| // kmp_int32 |
| // __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); |
| // |
| // parameters: |
| // p: %rdi |
| // d: %esi |
| // |
| // return: %eax |
| .text |
| PROC __kmp_test_then_add32 |
| |
| movl %esi, %eax // "d" |
| lock |
| xaddl %eax,(%rdi) |
| ret |
| |
| DEBUG_INFO __kmp_test_then_add32 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_test_then_add64 |
| // |
| // kmp_int64 |
| // __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); |
| // |
| // parameters: |
| // p: %rdi |
| // d: %rsi |
| // return: %rax |
| .text |
| PROC __kmp_test_then_add64 |
| |
| movq %rsi, %rax // "d" |
| lock |
| xaddq %rax,(%rdi) |
| ret |
| |
| DEBUG_INFO __kmp_test_then_add64 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_xchg_fixed8 |
| // |
| // kmp_int32 |
| // __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); |
| // |
| // parameters: |
| // p: %rdi |
| // d: %sil |
| // |
| // return: %al |
| .text |
| PROC __kmp_xchg_fixed8 |
| |
| movb %sil, %al // "d" |
| |
| lock |
| xchgb %al,(%rdi) |
| ret |
| |
| DEBUG_INFO __kmp_xchg_fixed8 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_xchg_fixed16 |
| // |
| // kmp_int16 |
| // __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); |
| // |
| // parameters: |
| // p: %rdi |
| // d: %si |
| // return: %ax |
| .text |
| PROC __kmp_xchg_fixed16 |
| |
| movw %si, %ax // "d" |
| |
| lock |
| xchgw %ax,(%rdi) |
| ret |
| |
| DEBUG_INFO __kmp_xchg_fixed16 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_xchg_fixed32 |
| // |
| // kmp_int32 |
| // __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); |
| // |
| // parameters: |
| // p: %rdi |
| // d: %esi |
| // |
| // return: %eax |
| .text |
| PROC __kmp_xchg_fixed32 |
| |
| movl %esi, %eax // "d" |
| |
| lock |
| xchgl %eax,(%rdi) |
| ret |
| |
| DEBUG_INFO __kmp_xchg_fixed32 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_xchg_fixed64 |
| // |
| // kmp_int64 |
| // __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); |
| // |
| // parameters: |
| // p: %rdi |
| // d: %rsi |
| // return: %rax |
| .text |
| PROC __kmp_xchg_fixed64 |
| |
| movq %rsi, %rax // "d" |
| |
| lock |
| xchgq %rax,(%rdi) |
| ret |
| |
| DEBUG_INFO __kmp_xchg_fixed64 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_compare_and_store8 |
| // |
| // kmp_int8 |
| // __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); |
| // |
| // parameters: |
| // p: %rdi |
| // cv: %esi |
| // sv: %edx |
| // |
| // return: %eax |
| .text |
| PROC __kmp_compare_and_store8 |
| |
| movb %sil, %al // "cv" |
| lock |
| cmpxchgb %dl,(%rdi) |
| sete %al // if %al == (%rdi) set %al = 1 else set %al = 0 |
| andq $1, %rax // sign extend previous instruction for return value |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store8 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_compare_and_store16 |
| // |
| // kmp_int16 |
| // __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); |
| // |
| // parameters: |
| // p: %rdi |
| // cv: %si |
| // sv: %dx |
| // |
| // return: %eax |
| .text |
| PROC __kmp_compare_and_store16 |
| |
| movw %si, %ax // "cv" |
| lock |
| cmpxchgw %dx,(%rdi) |
| sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0 |
| andq $1, %rax // sign extend previous instruction for return value |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store16 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_compare_and_store32 |
| // |
| // kmp_int32 |
| // __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); |
| // |
| // parameters: |
| // p: %rdi |
| // cv: %esi |
| // sv: %edx |
| // |
| // return: %eax |
| .text |
| PROC __kmp_compare_and_store32 |
| |
| movl %esi, %eax // "cv" |
| lock |
| cmpxchgl %edx,(%rdi) |
| sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0 |
| andq $1, %rax // sign extend previous instruction for return value |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store32 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_compare_and_store64 |
| // |
| // kmp_int32 |
| // __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); |
| // |
| // parameters: |
| // p: %rdi |
| // cv: %rsi |
| // sv: %rdx |
| // return: %eax |
| .text |
| PROC __kmp_compare_and_store64 |
| |
| movq %rsi, %rax // "cv" |
| lock |
| cmpxchgq %rdx,(%rdi) |
| sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0 |
| andq $1, %rax // sign extend previous instruction for return value |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store64 |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_compare_and_store_ret8 |
| // |
| // kmp_int8 |
| // __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); |
| // |
| // parameters: |
| // p: %rdi |
| // cv: %esi |
| // sv: %edx |
| // |
| // return: %eax |
| .text |
| PROC __kmp_compare_and_store_ret8 |
| |
| movb %sil, %al // "cv" |
| lock |
| cmpxchgb %dl,(%rdi) |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store_ret8 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_compare_and_store_ret16 |
| // |
| // kmp_int16 |
| // __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); |
| // |
| // parameters: |
| // p: %rdi |
| // cv: %si |
| // sv: %dx |
| // |
| // return: %eax |
| .text |
| PROC __kmp_compare_and_store_ret16 |
| |
| movw %si, %ax // "cv" |
| lock |
| cmpxchgw %dx,(%rdi) |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store_ret16 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_compare_and_store_ret32 |
| // |
| // kmp_int32 |
| // __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); |
| // |
| // parameters: |
| // p: %rdi |
| // cv: %esi |
| // sv: %edx |
| // |
| // return: %eax |
| .text |
| PROC __kmp_compare_and_store_ret32 |
| |
| movl %esi, %eax // "cv" |
| lock |
| cmpxchgl %edx,(%rdi) |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store_ret32 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_compare_and_store_ret64 |
| // |
| // kmp_int64 |
| // __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); |
| // |
| // parameters: |
| // p: %rdi |
| // cv: %rsi |
| // sv: %rdx |
| // return: %eax |
| .text |
| PROC __kmp_compare_and_store_ret64 |
| |
| movq %rsi, %rax // "cv" |
| lock |
| cmpxchgq %rdx,(%rdi) |
| ret |
| |
| DEBUG_INFO __kmp_compare_and_store_ret64 |
| |
| # endif /* !KMP_ASM_INTRINS */ |
| |
| |
| # if !KMP_MIC |
| |
| # if !KMP_ASM_INTRINS |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_xchg_real32 |
| // |
| // kmp_real32 |
| // __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); |
| // |
| // parameters: |
| // addr: %rdi |
| // data: %xmm0 (lower 4 bytes) |
| // |
| // return: %xmm0 (lower 4 bytes) |
| .text |
| PROC __kmp_xchg_real32 |
| |
| movd %xmm0, %eax // load "data" to eax |
| |
| lock |
| xchgl %eax, (%rdi) |
| |
| movd %eax, %xmm0 // load old value into return register |
| |
| ret |
| |
| DEBUG_INFO __kmp_xchg_real32 |
| |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_xchg_real64 |
| // |
| // kmp_real64 |
| // __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); |
| // |
| // parameters: |
| // addr: %rdi |
| // data: %xmm0 (lower 8 bytes) |
| // return: %xmm0 (lower 8 bytes) |
| .text |
| PROC __kmp_xchg_real64 |
| |
| movd %xmm0, %rax // load "data" to rax |
| |
| lock |
| xchgq %rax, (%rdi) |
| |
| movd %rax, %xmm0 // load old value into return register |
| ret |
| |
| DEBUG_INFO __kmp_xchg_real64 |
| |
| |
| # endif /* !KMP_MIC */ |
| |
| # endif /* !KMP_ASM_INTRINS */ |
| |
| //------------------------------------------------------------------------ |
| // int |
| // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), |
| // int gtid, int tid, |
| // int argc, void *p_argv[] |
| // #if OMPT_SUPPORT |
| // , |
| // void **exit_frame_ptr |
| // #endif |
| // ) { |
| // #if OMPT_SUPPORT |
| // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); |
| // #endif |
| // |
| // (*pkfn)( & gtid, & tid, argv[0], ... ); |
| // return 1; |
| // } |
| // |
| // note: at call to pkfn must have %rsp 128-byte aligned for compiler |
| // |
| // parameters: |
| // %rdi: pkfn |
| // %esi: gtid |
| // %edx: tid |
| // %ecx: argc |
| // %r8: p_argv |
| // %r9: &exit_frame |
| // |
| // locals: |
| // __gtid: gtid parm pushed on stack so can pass >id to pkfn |
| // __tid: tid parm pushed on stack so can pass &tid to pkfn |
| // |
| // reg temps: |
| // %rax: used all over the place |
| // %rdx: used in stack pointer alignment calculation |
| // %r11: used to traverse p_argv array |
| // %rsi: used as temporary for stack parameters |
| // used as temporary for number of pkfn parms to push |
| // %rbx: used to hold pkfn address, and zero constant, callee-save |
| // |
| // return: %eax (always 1/TRUE) |
| __gtid = -16 |
| __tid = -24 |
| |
| // -- Begin __kmp_invoke_microtask |
| // mark_begin; |
| .text |
| PROC __kmp_invoke_microtask |
| |
| pushq %rbp // save base pointer |
| KMP_CFI_DEF_OFFSET 16 |
| KMP_CFI_OFFSET rbp,-16 |
| movq %rsp,%rbp // establish the base pointer for this routine. |
| KMP_CFI_REGISTER rbp |
| |
| #if OMPT_SUPPORT |
| movq %rbp, (%r9) // save exit_frame |
| #endif |
| |
| pushq %rbx // %rbx is callee-saved register |
| pushq %rsi // Put gtid on stack so can pass &tgid to pkfn |
| pushq %rdx // Put tid on stack so can pass &tid to pkfn |
| |
| movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax |
| movq $0, %rbx // constant for cmovs later |
| subq $4, %rax // subtract four args passed in registers to pkfn |
| #if KMP_MIC |
| js KMP_LABEL(kmp_0) // jump to movq |
| jmp KMP_LABEL(kmp_0_exit) // jump ahead |
| KMP_LABEL(kmp_0): |
| movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) |
| KMP_LABEL(kmp_0_exit): |
| #else |
| cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) |
| #endif // KMP_MIC |
| |
| movq %rax, %rsi // save max(0, argc-4) -> %rsi for later |
| shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8 |
| |
| movq %rsp, %rdx // |
| subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx -- |
| // without align, stack ptr would be this |
| movq %rdx, %rax // Save to %rax |
| |
| andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align) |
| subq %rax, %rdx // Amount to subtract from %rsp |
| subq %rdx, %rsp // Prepare the stack ptr -- |
| // now %rsp will align to 128-byte boundary at call site |
| |
| // setup pkfn parameter reg and stack |
| movq %rcx, %rax // argc -> %rax |
| cmpq $0, %rsi |
| je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push |
| shlq $3, %rcx // argc*8 -> %rcx |
| movq %r8, %rdx // p_argv -> %rdx |
| addq %rcx, %rdx // &p_argv[argc] -> %rdx |
| |
| movq %rsi, %rcx // max (0, argc-4) -> %rcx |
| |
| KMP_LABEL(kmp_invoke_push_parms): |
| // push nth - 7th parms to pkfn on stack |
| subq $8, %rdx // decrement p_argv pointer to previous parm |
| movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi |
| pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order) |
| subl $1, %ecx |
| |
| // C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e |
| // if the name of the label that is an operand of this jecxz starts with a dot ("."); |
| // Apple's linker does not support 1-byte length relocation; |
| // Resolution: replace all .labelX entries with L_labelX. |
| |
| jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left |
| jmp KMP_LABEL(kmp_invoke_push_parms) |
| ALIGN 3 |
| KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. |
| // order here is important to avoid trashing |
| // registers used for both input and output parms! |
| movq %rdi, %rbx // pkfn -> %rbx |
| leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) |
| leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) |
| |
| movq %r8, %r11 // p_argv -> %r11 |
| |
| #if KMP_MIC |
| cmpq $4, %rax // argc >= 4? |
| jns KMP_LABEL(kmp_4) // jump to movq |
| jmp KMP_LABEL(kmp_4_exit) // jump ahead |
| KMP_LABEL(kmp_4): |
| movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) |
| KMP_LABEL(kmp_4_exit): |
| |
| cmpq $3, %rax // argc >= 3? |
| jns KMP_LABEL(kmp_3) // jump to movq |
| jmp KMP_LABEL(kmp_3_exit) // jump ahead |
| KMP_LABEL(kmp_3): |
| movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) |
| KMP_LABEL(kmp_3_exit): |
| |
| cmpq $2, %rax // argc >= 2? |
| jns KMP_LABEL(kmp_2) // jump to movq |
| jmp KMP_LABEL(kmp_2_exit) // jump ahead |
| KMP_LABEL(kmp_2): |
| movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) |
| KMP_LABEL(kmp_2_exit): |
| |
| cmpq $1, %rax // argc >= 1? |
| jns KMP_LABEL(kmp_1) // jump to movq |
| jmp KMP_LABEL(kmp_1_exit) // jump ahead |
| KMP_LABEL(kmp_1): |
| movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) |
| KMP_LABEL(kmp_1_exit): |
| #else |
| cmpq $4, %rax // argc >= 4? |
| cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) |
| |
| cmpq $3, %rax // argc >= 3? |
| cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) |
| |
| cmpq $2, %rax // argc >= 2? |
| cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) |
| |
| cmpq $1, %rax // argc >= 1? |
| cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) |
| #endif // KMP_MIC |
| |
| call *%rbx // call (*pkfn)(); |
| movq $1, %rax // move 1 into return register; |
| |
| movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified |
| movq %rbp, %rsp // restore stack pointer |
| popq %rbp // restore frame pointer |
| KMP_CFI_DEF rsp,8 |
| ret |
| |
| DEBUG_INFO __kmp_invoke_microtask |
| // -- End __kmp_invoke_microtask |
| |
| // kmp_uint64 |
| // __kmp_hardware_timestamp(void) |
| .text |
| PROC __kmp_hardware_timestamp |
| rdtsc |
| shlq $32, %rdx |
| orq %rdx, %rax |
| ret |
| |
| DEBUG_INFO __kmp_hardware_timestamp |
| // -- End __kmp_hardware_timestamp |
| |
| //------------------------------------------------------------------------ |
| // FUNCTION __kmp_bsr32 |
| // |
| // int |
| // __kmp_bsr32( int ); |
| .text |
| PROC __kmp_bsr32 |
| |
| bsr %edi,%eax |
| ret |
| |
| DEBUG_INFO __kmp_bsr32 |
| |
| // ----------------------------------------------------------------------- |
| #endif /* KMP_ARCH_X86_64 */ |
| |
| // ' |
| #if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 |
| |
| //------------------------------------------------------------------------ |
| // int |
| // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), |
| // int gtid, int tid, |
| // int argc, void *p_argv[] |
| // #if OMPT_SUPPORT |
| // , |
| // void **exit_frame_ptr |
| // #endif |
| // ) { |
| // #if OMPT_SUPPORT |
| // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); |
| // #endif |
| // |
| // (*pkfn)( & gtid, & tid, argv[0], ... ); |
| // |
| // // FIXME: This is done at call-site and can be removed here. |
| // #if OMPT_SUPPORT |
| // *exit_frame_ptr = 0; |
| // #endif |
| // |
| // return 1; |
| // } |
| // |
| // parameters: |
| // x0: pkfn |
| // w1: gtid |
| // w2: tid |
| // w3: argc |
| // x4: p_argv |
| // x5: &exit_frame |
| // |
| // locals: |
| // __gtid: gtid parm pushed on stack so can pass >id to pkfn |
| // __tid: tid parm pushed on stack so can pass &tid to pkfn |
| // |
| // reg temps: |
| // x8: used to hold pkfn address |
| // w9: used as temporary for number of pkfn parms |
| // x10: used to traverse p_argv array |
| // x11: used as temporary for stack placement calculation |
| // x12: used as temporary for stack parameters |
| // x19: used to preserve exit_frame_ptr, callee-save |
| // |
| // return: w0 (always 1/TRUE) |
| // |
| |
| __gtid = 4 |
| __tid = 8 |
| |
| // -- Begin __kmp_invoke_microtask |
| // mark_begin; |
| .text |
| PROC __kmp_invoke_microtask |
| |
| stp x29, x30, [sp, #-16]! |
| # if OMPT_SUPPORT |
| stp x19, x20, [sp, #-16]! |
| # endif |
| mov x29, sp |
| |
| orr w9, wzr, #1 |
| add w9, w9, w3, lsr #1 |
| sub sp, sp, w9, uxtw #4 |
| mov x11, sp |
| |
| mov x8, x0 |
| str w1, [x29, #-__gtid] |
| str w2, [x29, #-__tid] |
| mov w9, w3 |
| mov x10, x4 |
| # if OMPT_SUPPORT |
| mov x19, x5 |
| str x29, [x19] |
| # endif |
| |
| sub x0, x29, #__gtid |
| sub x1, x29, #__tid |
| |
| cbz w9, KMP_LABEL(kmp_1) |
| ldr x2, [x10] |
| |
| sub w9, w9, #1 |
| cbz w9, KMP_LABEL(kmp_1) |
| ldr x3, [x10, #8]! |
| |
| sub w9, w9, #1 |
| cbz w9, KMP_LABEL(kmp_1) |
| ldr x4, [x10, #8]! |
| |
| sub w9, w9, #1 |
| cbz w9, KMP_LABEL(kmp_1) |
| ldr x5, [x10, #8]! |
| |
| sub w9, w9, #1 |
| cbz w9, KMP_LABEL(kmp_1) |
| ldr x6, [x10, #8]! |
| |
| sub w9, w9, #1 |
| cbz w9, KMP_LABEL(kmp_1) |
| ldr x7, [x10, #8]! |
| |
| KMP_LABEL(kmp_0): |
| sub w9, w9, #1 |
| cbz w9, KMP_LABEL(kmp_1) |
| ldr x12, [x10, #8]! |
| str x12, [x11], #8 |
| b KMP_LABEL(kmp_0) |
| KMP_LABEL(kmp_1): |
| blr x8 |
| orr w0, wzr, #1 |
| mov sp, x29 |
| # if OMPT_SUPPORT |
| str xzr, [x19] |
| ldp x19, x20, [sp], #16 |
| # endif |
| ldp x29, x30, [sp], #16 |
| ret |
| |
| DEBUG_INFO __kmp_invoke_microtask |
| // -- End __kmp_invoke_microtask |
| |
| #endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */ |
| |
| #if KMP_ARCH_PPC64 |
| |
| //------------------------------------------------------------------------ |
| // int |
| // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), |
| // int gtid, int tid, |
| // int argc, void *p_argv[] |
| // #if OMPT_SUPPORT |
| // , |
| // void **exit_frame_ptr |
| // #endif |
| // ) { |
| // #if OMPT_SUPPORT |
| // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); |
| // #endif |
| // |
| // (*pkfn)( & gtid, & tid, argv[0], ... ); |
| // |
| // // FIXME: This is done at call-site and can be removed here. |
| // #if OMPT_SUPPORT |
| // *exit_frame_ptr = 0; |
| // #endif |
| // |
| // return 1; |
| // } |
| // |
| // parameters: |
| // r3: pkfn |
| // r4: gtid |
| // r5: tid |
| // r6: argc |
| // r7: p_argv |
| // r8: &exit_frame |
| // |
| // return: r3 (always 1/TRUE) |
| // |
| .text |
| # if KMP_ARCH_PPC64_ELFv2 |
| .abiversion 2 |
| # endif |
| .globl __kmp_invoke_microtask |
| |
| # if KMP_ARCH_PPC64_ELFv2 |
| .p2align 4 |
| # else |
| .p2align 2 |
| # endif |
| |
| .type __kmp_invoke_microtask,@function |
| |
| # if KMP_ARCH_PPC64_ELFv2 |
| __kmp_invoke_microtask: |
| .Lfunc_begin0: |
| .Lfunc_gep0: |
| addis 2, 12, .TOC.-.Lfunc_gep0@ha |
| addi 2, 2, .TOC.-.Lfunc_gep0@l |
| .Lfunc_lep0: |
| .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0 |
| # else |
| .section .opd,"aw",@progbits |
| __kmp_invoke_microtask: |
| .p2align 3 |
| .quad .Lfunc_begin0 |
| .quad .TOC.@tocbase |
| .quad 0 |
| .text |
| .Lfunc_begin0: |
| # endif |
| |
| // -- Begin __kmp_invoke_microtask |
| // mark_begin; |
| |
| // We need to allocate a stack frame large enough to hold all of the parameters |
| // on the stack for the microtask plus what this function needs. That's 48 |
| // bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the |
| // parameters to the microtask, plus 8 bytes to store the values of r4 and r5, |
| // and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes |
| // to save r30 to hold a copy of r8. |
| |
| .cfi_startproc |
| mflr 0 |
| std 31, -8(1) |
| std 0, 16(1) |
| |
| // This is unusual because normally we'd set r31 equal to r1 after the stack |
| // frame is established. In this case, however, we need to dynamically compute |
| // the stack frame size, and so we keep a direct copy of r1 to access our |
| // register save areas and restore the r1 value before returning. |
| mr 31, 1 |
| .cfi_def_cfa_register r31 |
| .cfi_offset r31, -8 |
| .cfi_offset lr, 16 |
| |
| // Compute the size necessary for the local stack frame. |
| # if KMP_ARCH_PPC64_ELFv2 |
| li 12, 72 |
| # else |
| li 12, 88 |
| # endif |
| sldi 0, 6, 3 |
| add 12, 0, 12 |
| neg 12, 12 |
| |
| // We need to make sure that the stack frame stays aligned (to 16 bytes). |
| li 0, -16 |
| and 12, 0, 12 |
| |
| // Establish the local stack frame. |
| stdux 1, 1, 12 |
| |
| # if OMPT_SUPPORT |
| .cfi_offset r30, -16 |
| std 30, -16(31) |
| std 1, 0(8) |
| mr 30, 8 |
| # endif |
| |
| // Store gtid and tid to the stack because they're passed by reference to the microtask. |
| stw 4, -20(31) |
| stw 5, -24(31) |
| |
| mr 12, 6 |
| mr 4, 7 |
| |
| cmpwi 0, 12, 1 |
| blt 0, .Lcall |
| |
| ld 5, 0(4) |
| |
| cmpwi 0, 12, 2 |
| blt 0, .Lcall |
| |
| ld 6, 8(4) |
| |
| cmpwi 0, 12, 3 |
| blt 0, .Lcall |
| |
| ld 7, 16(4) |
| |
| cmpwi 0, 12, 4 |
| blt 0, .Lcall |
| |
| ld 8, 24(4) |
| |
| cmpwi 0, 12, 5 |
| blt 0, .Lcall |
| |
| ld 9, 32(4) |
| |
| cmpwi 0, 12, 6 |
| blt 0, .Lcall |
| |
| ld 10, 40(4) |
| |
| cmpwi 0, 12, 7 |
| blt 0, .Lcall |
| |
| // There are more than 6 microtask parameters, so we need to store the |
| // remainder to the stack. |
| addi 12, 12, -6 |
| mtctr 12 |
| |
| // These are set to 8 bytes before the first desired store address (we're using |
| // pre-increment loads and stores in the loop below). The parameter save area |
| // for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and |
| // 32 + 8*8 == 96 bytes above r1 for ELFv2. |
| addi 4, 4, 40 |
| # if KMP_ARCH_PPC64_ELFv2 |
| addi 12, 1, 88 |
| # else |
| addi 12, 1, 104 |
| # endif |
| |
| .Lnext: |
| ldu 0, 8(4) |
| stdu 0, 8(12) |
| bdnz .Lnext |
| |
| .Lcall: |
| # if KMP_ARCH_PPC64_ELFv2 |
| std 2, 24(1) |
| mr 12, 3 |
| #else |
| std 2, 40(1) |
| // For ELFv1, we need to load the actual function address from the function descriptor. |
| ld 12, 0(3) |
| ld 2, 8(3) |
| ld 11, 16(3) |
| #endif |
| |
| addi 3, 31, -20 |
| addi 4, 31, -24 |
| |
| mtctr 12 |
| bctrl |
| # if KMP_ARCH_PPC64_ELFv2 |
| ld 2, 24(1) |
| # else |
| ld 2, 40(1) |
| # endif |
| |
| # if OMPT_SUPPORT |
| li 3, 0 |
| std 3, 0(30) |
| # endif |
| |
| li 3, 1 |
| |
| # if OMPT_SUPPORT |
| ld 30, -16(31) |
| # endif |
| |
| mr 1, 31 |
| ld 0, 16(1) |
| ld 31, -8(1) |
| mtlr 0 |
| blr |
| |
| .long 0 |
| .quad 0 |
| .Lfunc_end0: |
| .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0 |
| .cfi_endproc |
| |
| // -- End __kmp_invoke_microtask |
| |
| #endif /* KMP_ARCH_PPC64 */ |
| |
| #if KMP_ARCH_RISCV64 |
| |
| //------------------------------------------------------------------------ |
| // |
| // typedef void (*microtask_t)(int *gtid, int *tid, ...); |
| // |
| // int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, |
| // void *p_argv[] |
| // #if OMPT_SUPPORT |
| // , |
| // void **exit_frame_ptr |
| // #endif |
| // ) { |
| // #if OMPT_SUPPORT |
| // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); |
| // #endif |
| // |
| // (*pkfn)(>id, &tid, argv[0], ...); |
| // |
| // return 1; |
| // } |
| // |
| // Parameters: |
| // a0: pkfn |
| // a1: gtid |
| // a2: tid |
| // a3: argc |
| // a4: p_argv |
| // a5: exit_frame_ptr |
| // |
| // Locals: |
| // __gtid: gtid param pushed on stack so can pass >id to pkfn |
| // __tid: tid param pushed on stack so can pass &tid to pkfn |
| // |
| // Temp. registers: |
| // |
| // t0: used to calculate the dynamic stack size / used to hold pkfn address |
| // t1: used as temporary for stack placement calculation |
| // t2: used as temporary for stack arguments |
| // t3: used as temporary for number of remaining pkfn parms |
| // t4: used to traverse p_argv array |
| // |
| // return: a0 (always 1/TRUE) |
| // |
| |
| __gtid = -20 |
| __tid = -24 |
| |
| // -- Begin __kmp_invoke_microtask |
| // mark_begin; |
| .text |
| .globl __kmp_invoke_microtask |
| .p2align 1 |
| .type __kmp_invoke_microtask,@function |
| __kmp_invoke_microtask: |
| .cfi_startproc |
| |
| // First, save ra and fp |
| addi sp, sp, -16 |
| sd ra, 8(sp) |
| sd fp, 0(sp) |
| addi fp, sp, 16 |
| .cfi_def_cfa fp, 0 |
| .cfi_offset ra, -8 |
| .cfi_offset fp, -16 |
| |
| // Compute the dynamic stack size: |
| // |
| // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by |
| // reference |
| // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' |
| // function by register. Given that we have 8 of such registers (a[0-7]) |
| // and two + 'argc' arguments (consider >id and &tid), we need to |
| // reserve max(0, argc - 6)*8 extra bytes |
| // |
| // The total number of bytes is then max(0, argc - 6)*8 + 8 |
| |
| // Compute max(0, argc - 6) using the following bithack: |
| // max(0, x) = x - (x & (x >> 31)), where x := argc - 6 |
| // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax |
| addi t0, a3, -6 |
| srai t1, t0, 31 |
| and t1, t0, t1 |
| sub t0, t0, t1 |
| |
| addi t0, t0, 1 |
| |
| slli t0, t0, 3 |
| sub sp, sp, t0 |
| |
| // Align the stack to 16 bytes |
| andi sp, sp, -16 |
| |
| mv t0, a0 |
| mv t3, a3 |
| mv t4, a4 |
| |
| #if OMPT_SUPPORT |
| // Save frame pointer into exit_frame |
| sd fp, 0(a5) |
| #endif |
| |
| // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) |
| |
| sw a1, __gtid(fp) |
| sw a2, __tid(fp) |
| |
| addi a0, fp, __gtid |
| addi a1, fp, __tid |
| |
| beqz t3, .L_kmp_3 |
| ld a2, 0(t4) |
| |
| addi t3, t3, -1 |
| beqz t3, .L_kmp_3 |
| ld a3, 8(t4) |
| |
| addi t3, t3, -1 |
| beqz t3, .L_kmp_3 |
| ld a4, 16(t4) |
| |
| addi t3, t3, -1 |
| beqz t3, .L_kmp_3 |
| ld a5, 24(t4) |
| |
| addi t3, t3, -1 |
| beqz t3, .L_kmp_3 |
| ld a6, 32(t4) |
| |
| addi t3, t3, -1 |
| beqz t3, .L_kmp_3 |
| ld a7, 40(t4) |
| |
| // Prepare any additional argument passed through the stack |
| addi t4, t4, 48 |
| mv t1, sp |
| j .L_kmp_2 |
| .L_kmp_1: |
| ld t2, 0(t4) |
| sd t2, 0(t1) |
| addi t4, t4, 8 |
| addi t1, t1, 8 |
| .L_kmp_2: |
| addi t3, t3, -1 |
| bnez t3, .L_kmp_1 |
| |
| .L_kmp_3: |
| // Call pkfn function |
| jalr t0 |
| |
| // Restore stack and return |
| |
| addi a0, zero, 1 |
| |
| addi sp, fp, -16 |
| ld fp, 0(sp) |
| ld ra, 8(sp) |
| addi sp, sp, 16 |
| ret |
| .Lfunc_end0: |
| .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask |
| .cfi_endproc |
| |
| // -- End __kmp_invoke_microtask |
| |
| #endif /* KMP_ARCH_RISCV64 */ |
| |
| #if KMP_ARCH_ARM || KMP_ARCH_MIPS |
| .data |
| .comm .gomp_critical_user_,32,8 |
| .data |
| .align 4 |
| .global __kmp_unnamed_critical_addr |
| __kmp_unnamed_critical_addr: |
| .4byte .gomp_critical_user_ |
| .size __kmp_unnamed_critical_addr,4 |
| #endif /* KMP_ARCH_ARM */ |
| |
| #if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 |
| #ifndef KMP_PREFIX_UNDERSCORE |
| # define KMP_PREFIX_UNDERSCORE(x) x |
| #endif |
| .data |
| .comm .gomp_critical_user_,32,8 |
| .data |
| .align 8 |
| .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) |
| KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): |
| .8byte .gomp_critical_user_ |
| #ifdef __ELF__ |
| .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 |
| #endif |
| #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || |
| KMP_ARCH_RISCV64 */ |
| |
| #if KMP_OS_LINUX |
| # if KMP_ARCH_ARM |
| .section .note.GNU-stack,"",%progbits |
| # else |
| .section .note.GNU-stack,"",@progbits |
| # endif |
| #endif |