| /* |
| * kmp_atomic.c -- ATOMIC implementation routines |
| * $Revision: 43421 $ |
| * $Date: 2014-08-28 08:56:10 -0500 (Thu, 28 Aug 2014) $ |
| */ |
| |
| |
| //===----------------------------------------------------------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is dual licensed under the MIT and the University of Illinois Open |
| // Source Licenses. See LICENSE.txt for details. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| |
| #include "kmp_atomic.h" |
| #include "kmp.h" // TRUE, asm routines prototypes |
| |
| typedef unsigned char uchar; |
| typedef unsigned short ushort; |
| |
| /*! |
| @defgroup ATOMIC_OPS Atomic Operations |
| These functions are used for implementing the many different varieties of atomic operations. |
| |
| The compiler is at liberty to inline atomic operations that are naturally supported |
| by the target architecture. For instance on IA-32 architecture an atomic like this can be inlined |
| @code |
| static int s = 0; |
| #pragma omp atomic |
| s++; |
| @endcode |
| using the single instruction: `lock; incl s` |
| |
| However the runtime does provide entrypoints for these operations to support compilers that choose |
| not to inline them. (For instance, `__kmpc_atomic_fixed4_add` could be used to perform the |
| increment above.) |
| |
| The names of the functions are encoded by using the data type name and the operation name, as in these tables. |
| |
| Data Type | Data type encoding |
| -----------|--------------- |
| int8_t | `fixed1` |
| uint8_t | `fixed1u` |
| int16_t | `fixed2` |
| uint16_t | `fixed2u` |
| int32_t | `fixed4` |
| uint32_t | `fixed4u` |
| int32_t | `fixed8` |
| uint32_t | `fixed8u` |
| float | `float4` |
| double | `float8` |
| float 10 (8087 eighty bit float) | `float10` |
| complex<float> | `cmplx4` |
| complex<double> | `cmplx8` |
| complex<float10> | `cmplx10` |
| <br> |
| |
| Operation | Operation encoding |
| ----------|------------------- |
| + | add |
| - | sub |
| \* | mul |
| / | div |
| & | andb |
| << | shl |
| \>\> | shr |
| \| | orb |
| ^ | xor |
| && | andl |
| \|\| | orl |
| maximum | max |
| minimum | min |
| .eqv. | eqv |
| .neqv. | neqv |
| |
| <br> |
| For non-commutative operations, `_rev` can also be added for the reversed operation. |
| For the functions that capture the result, the suffix `_cpt` is added. |
| |
| Update Functions |
| ================ |
| The general form of an atomic function that just performs an update (without a `capture`) |
| @code |
| void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ); |
| @endcode |
| @param ident_t a pointer to source location |
| @param gtid the global thread id |
| @param lhs a pointer to the left operand |
| @param rhs the right operand |
| |
| `capture` functions |
| =================== |
| The capture functions perform an atomic update and return a result, which is either the value |
| before the capture, or that after. They take an additional argument to determine which result is returned. |
| Their general form is therefore |
| @code |
| TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ); |
| @endcode |
| @param ident_t a pointer to source location |
| @param gtid the global thread id |
| @param lhs a pointer to the left operand |
| @param rhs the right operand |
| @param flag one if the result is to be captured *after* the operation, zero if captured *before*. |
| |
| The one set of exceptions to this is the `complex<float>` type where the value is not returned, |
| rather an extra argument pointer is passed. |
| |
| They look like |
| @code |
| void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); |
| @endcode |
| |
| Read and Write Operations |
| ========================= |
| The OpenMP<sup>*</sup> standard now supports atomic operations that simply ensure that the |
| value is read or written atomically, with no modification |
| performed. In many cases on IA-32 architecture these operations can be inlined since |
| the architecture guarantees that no tearing occurs on aligned objects |
| accessed with a single memory operation of up to 64 bits in size. |
| |
| The general form of the read operations is |
| @code |
| TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); |
| @endcode |
| |
| For the write operations the form is |
| @code |
| void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ); |
| @endcode |
| |
| Full list of functions |
| ====================== |
| This leads to the generation of 376 atomic functions, as follows. |
| |
| Functons for integers |
| --------------------- |
| There are versions here for integers of size 1,2,4 and 8 bytes both signed and unsigned (where that matters). |
| @code |
| __kmpc_atomic_fixed1_add |
| __kmpc_atomic_fixed1_add_cpt |
| __kmpc_atomic_fixed1_add_fp |
| __kmpc_atomic_fixed1_andb |
| __kmpc_atomic_fixed1_andb_cpt |
| __kmpc_atomic_fixed1_andl |
| __kmpc_atomic_fixed1_andl_cpt |
| __kmpc_atomic_fixed1_div |
| __kmpc_atomic_fixed1_div_cpt |
| __kmpc_atomic_fixed1_div_cpt_rev |
| __kmpc_atomic_fixed1_div_float8 |
| __kmpc_atomic_fixed1_div_fp |
| __kmpc_atomic_fixed1_div_rev |
| __kmpc_atomic_fixed1_eqv |
| __kmpc_atomic_fixed1_eqv_cpt |
| __kmpc_atomic_fixed1_max |
| __kmpc_atomic_fixed1_max_cpt |
| __kmpc_atomic_fixed1_min |
| __kmpc_atomic_fixed1_min_cpt |
| __kmpc_atomic_fixed1_mul |
| __kmpc_atomic_fixed1_mul_cpt |
| __kmpc_atomic_fixed1_mul_float8 |
| __kmpc_atomic_fixed1_mul_fp |
| __kmpc_atomic_fixed1_neqv |
| __kmpc_atomic_fixed1_neqv_cpt |
| __kmpc_atomic_fixed1_orb |
| __kmpc_atomic_fixed1_orb_cpt |
| __kmpc_atomic_fixed1_orl |
| __kmpc_atomic_fixed1_orl_cpt |
| __kmpc_atomic_fixed1_rd |
| __kmpc_atomic_fixed1_shl |
| __kmpc_atomic_fixed1_shl_cpt |
| __kmpc_atomic_fixed1_shl_cpt_rev |
| __kmpc_atomic_fixed1_shl_rev |
| __kmpc_atomic_fixed1_shr |
| __kmpc_atomic_fixed1_shr_cpt |
| __kmpc_atomic_fixed1_shr_cpt_rev |
| __kmpc_atomic_fixed1_shr_rev |
| __kmpc_atomic_fixed1_sub |
| __kmpc_atomic_fixed1_sub_cpt |
| __kmpc_atomic_fixed1_sub_cpt_rev |
| __kmpc_atomic_fixed1_sub_fp |
| __kmpc_atomic_fixed1_sub_rev |
| __kmpc_atomic_fixed1_swp |
| __kmpc_atomic_fixed1_wr |
| __kmpc_atomic_fixed1_xor |
| __kmpc_atomic_fixed1_xor_cpt |
| __kmpc_atomic_fixed1u_div |
| __kmpc_atomic_fixed1u_div_cpt |
| __kmpc_atomic_fixed1u_div_cpt_rev |
| __kmpc_atomic_fixed1u_div_fp |
| __kmpc_atomic_fixed1u_div_rev |
| __kmpc_atomic_fixed1u_shr |
| __kmpc_atomic_fixed1u_shr_cpt |
| __kmpc_atomic_fixed1u_shr_cpt_rev |
| __kmpc_atomic_fixed1u_shr_rev |
| __kmpc_atomic_fixed2_add |
| __kmpc_atomic_fixed2_add_cpt |
| __kmpc_atomic_fixed2_add_fp |
| __kmpc_atomic_fixed2_andb |
| __kmpc_atomic_fixed2_andb_cpt |
| __kmpc_atomic_fixed2_andl |
| __kmpc_atomic_fixed2_andl_cpt |
| __kmpc_atomic_fixed2_div |
| __kmpc_atomic_fixed2_div_cpt |
| __kmpc_atomic_fixed2_div_cpt_rev |
| __kmpc_atomic_fixed2_div_float8 |
| __kmpc_atomic_fixed2_div_fp |
| __kmpc_atomic_fixed2_div_rev |
| __kmpc_atomic_fixed2_eqv |
| __kmpc_atomic_fixed2_eqv_cpt |
| __kmpc_atomic_fixed2_max |
| __kmpc_atomic_fixed2_max_cpt |
| __kmpc_atomic_fixed2_min |
| __kmpc_atomic_fixed2_min_cpt |
| __kmpc_atomic_fixed2_mul |
| __kmpc_atomic_fixed2_mul_cpt |
| __kmpc_atomic_fixed2_mul_float8 |
| __kmpc_atomic_fixed2_mul_fp |
| __kmpc_atomic_fixed2_neqv |
| __kmpc_atomic_fixed2_neqv_cpt |
| __kmpc_atomic_fixed2_orb |
| __kmpc_atomic_fixed2_orb_cpt |
| __kmpc_atomic_fixed2_orl |
| __kmpc_atomic_fixed2_orl_cpt |
| __kmpc_atomic_fixed2_rd |
| __kmpc_atomic_fixed2_shl |
| __kmpc_atomic_fixed2_shl_cpt |
| __kmpc_atomic_fixed2_shl_cpt_rev |
| __kmpc_atomic_fixed2_shl_rev |
| __kmpc_atomic_fixed2_shr |
| __kmpc_atomic_fixed2_shr_cpt |
| __kmpc_atomic_fixed2_shr_cpt_rev |
| __kmpc_atomic_fixed2_shr_rev |
| __kmpc_atomic_fixed2_sub |
| __kmpc_atomic_fixed2_sub_cpt |
| __kmpc_atomic_fixed2_sub_cpt_rev |
| __kmpc_atomic_fixed2_sub_fp |
| __kmpc_atomic_fixed2_sub_rev |
| __kmpc_atomic_fixed2_swp |
| __kmpc_atomic_fixed2_wr |
| __kmpc_atomic_fixed2_xor |
| __kmpc_atomic_fixed2_xor_cpt |
| __kmpc_atomic_fixed2u_div |
| __kmpc_atomic_fixed2u_div_cpt |
| __kmpc_atomic_fixed2u_div_cpt_rev |
| __kmpc_atomic_fixed2u_div_fp |
| __kmpc_atomic_fixed2u_div_rev |
| __kmpc_atomic_fixed2u_shr |
| __kmpc_atomic_fixed2u_shr_cpt |
| __kmpc_atomic_fixed2u_shr_cpt_rev |
| __kmpc_atomic_fixed2u_shr_rev |
| __kmpc_atomic_fixed4_add |
| __kmpc_atomic_fixed4_add_cpt |
| __kmpc_atomic_fixed4_add_fp |
| __kmpc_atomic_fixed4_andb |
| __kmpc_atomic_fixed4_andb_cpt |
| __kmpc_atomic_fixed4_andl |
| __kmpc_atomic_fixed4_andl_cpt |
| __kmpc_atomic_fixed4_div |
| __kmpc_atomic_fixed4_div_cpt |
| __kmpc_atomic_fixed4_div_cpt_rev |
| __kmpc_atomic_fixed4_div_float8 |
| __kmpc_atomic_fixed4_div_fp |
| __kmpc_atomic_fixed4_div_rev |
| __kmpc_atomic_fixed4_eqv |
| __kmpc_atomic_fixed4_eqv_cpt |
| __kmpc_atomic_fixed4_max |
| __kmpc_atomic_fixed4_max_cpt |
| __kmpc_atomic_fixed4_min |
| __kmpc_atomic_fixed4_min_cpt |
| __kmpc_atomic_fixed4_mul |
| __kmpc_atomic_fixed4_mul_cpt |
| __kmpc_atomic_fixed4_mul_float8 |
| __kmpc_atomic_fixed4_mul_fp |
| __kmpc_atomic_fixed4_neqv |
| __kmpc_atomic_fixed4_neqv_cpt |
| __kmpc_atomic_fixed4_orb |
| __kmpc_atomic_fixed4_orb_cpt |
| __kmpc_atomic_fixed4_orl |
| __kmpc_atomic_fixed4_orl_cpt |
| __kmpc_atomic_fixed4_rd |
| __kmpc_atomic_fixed4_shl |
| __kmpc_atomic_fixed4_shl_cpt |
| __kmpc_atomic_fixed4_shl_cpt_rev |
| __kmpc_atomic_fixed4_shl_rev |
| __kmpc_atomic_fixed4_shr |
| __kmpc_atomic_fixed4_shr_cpt |
| __kmpc_atomic_fixed4_shr_cpt_rev |
| __kmpc_atomic_fixed4_shr_rev |
| __kmpc_atomic_fixed4_sub |
| __kmpc_atomic_fixed4_sub_cpt |
| __kmpc_atomic_fixed4_sub_cpt_rev |
| __kmpc_atomic_fixed4_sub_fp |
| __kmpc_atomic_fixed4_sub_rev |
| __kmpc_atomic_fixed4_swp |
| __kmpc_atomic_fixed4_wr |
| __kmpc_atomic_fixed4_xor |
| __kmpc_atomic_fixed4_xor_cpt |
| __kmpc_atomic_fixed4u_div |
| __kmpc_atomic_fixed4u_div_cpt |
| __kmpc_atomic_fixed4u_div_cpt_rev |
| __kmpc_atomic_fixed4u_div_fp |
| __kmpc_atomic_fixed4u_div_rev |
| __kmpc_atomic_fixed4u_shr |
| __kmpc_atomic_fixed4u_shr_cpt |
| __kmpc_atomic_fixed4u_shr_cpt_rev |
| __kmpc_atomic_fixed4u_shr_rev |
| __kmpc_atomic_fixed8_add |
| __kmpc_atomic_fixed8_add_cpt |
| __kmpc_atomic_fixed8_add_fp |
| __kmpc_atomic_fixed8_andb |
| __kmpc_atomic_fixed8_andb_cpt |
| __kmpc_atomic_fixed8_andl |
| __kmpc_atomic_fixed8_andl_cpt |
| __kmpc_atomic_fixed8_div |
| __kmpc_atomic_fixed8_div_cpt |
| __kmpc_atomic_fixed8_div_cpt_rev |
| __kmpc_atomic_fixed8_div_float8 |
| __kmpc_atomic_fixed8_div_fp |
| __kmpc_atomic_fixed8_div_rev |
| __kmpc_atomic_fixed8_eqv |
| __kmpc_atomic_fixed8_eqv_cpt |
| __kmpc_atomic_fixed8_max |
| __kmpc_atomic_fixed8_max_cpt |
| __kmpc_atomic_fixed8_min |
| __kmpc_atomic_fixed8_min_cpt |
| __kmpc_atomic_fixed8_mul |
| __kmpc_atomic_fixed8_mul_cpt |
| __kmpc_atomic_fixed8_mul_float8 |
| __kmpc_atomic_fixed8_mul_fp |
| __kmpc_atomic_fixed8_neqv |
| __kmpc_atomic_fixed8_neqv_cpt |
| __kmpc_atomic_fixed8_orb |
| __kmpc_atomic_fixed8_orb_cpt |
| __kmpc_atomic_fixed8_orl |
| __kmpc_atomic_fixed8_orl_cpt |
| __kmpc_atomic_fixed8_rd |
| __kmpc_atomic_fixed8_shl |
| __kmpc_atomic_fixed8_shl_cpt |
| __kmpc_atomic_fixed8_shl_cpt_rev |
| __kmpc_atomic_fixed8_shl_rev |
| __kmpc_atomic_fixed8_shr |
| __kmpc_atomic_fixed8_shr_cpt |
| __kmpc_atomic_fixed8_shr_cpt_rev |
| __kmpc_atomic_fixed8_shr_rev |
| __kmpc_atomic_fixed8_sub |
| __kmpc_atomic_fixed8_sub_cpt |
| __kmpc_atomic_fixed8_sub_cpt_rev |
| __kmpc_atomic_fixed8_sub_fp |
| __kmpc_atomic_fixed8_sub_rev |
| __kmpc_atomic_fixed8_swp |
| __kmpc_atomic_fixed8_wr |
| __kmpc_atomic_fixed8_xor |
| __kmpc_atomic_fixed8_xor_cpt |
| __kmpc_atomic_fixed8u_div |
| __kmpc_atomic_fixed8u_div_cpt |
| __kmpc_atomic_fixed8u_div_cpt_rev |
| __kmpc_atomic_fixed8u_div_fp |
| __kmpc_atomic_fixed8u_div_rev |
| __kmpc_atomic_fixed8u_shr |
| __kmpc_atomic_fixed8u_shr_cpt |
| __kmpc_atomic_fixed8u_shr_cpt_rev |
| __kmpc_atomic_fixed8u_shr_rev |
| @endcode |
| |
| Functions for floating point |
| ---------------------------- |
| There are versions here for floating point numbers of size 4, 8, 10 and 16 bytes. |
| (Ten byte floats are used by X87, but are now rare). |
| @code |
| __kmpc_atomic_float4_add |
| __kmpc_atomic_float4_add_cpt |
| __kmpc_atomic_float4_add_float8 |
| __kmpc_atomic_float4_add_fp |
| __kmpc_atomic_float4_div |
| __kmpc_atomic_float4_div_cpt |
| __kmpc_atomic_float4_div_cpt_rev |
| __kmpc_atomic_float4_div_float8 |
| __kmpc_atomic_float4_div_fp |
| __kmpc_atomic_float4_div_rev |
| __kmpc_atomic_float4_max |
| __kmpc_atomic_float4_max_cpt |
| __kmpc_atomic_float4_min |
| __kmpc_atomic_float4_min_cpt |
| __kmpc_atomic_float4_mul |
| __kmpc_atomic_float4_mul_cpt |
| __kmpc_atomic_float4_mul_float8 |
| __kmpc_atomic_float4_mul_fp |
| __kmpc_atomic_float4_rd |
| __kmpc_atomic_float4_sub |
| __kmpc_atomic_float4_sub_cpt |
| __kmpc_atomic_float4_sub_cpt_rev |
| __kmpc_atomic_float4_sub_float8 |
| __kmpc_atomic_float4_sub_fp |
| __kmpc_atomic_float4_sub_rev |
| __kmpc_atomic_float4_swp |
| __kmpc_atomic_float4_wr |
| __kmpc_atomic_float8_add |
| __kmpc_atomic_float8_add_cpt |
| __kmpc_atomic_float8_add_fp |
| __kmpc_atomic_float8_div |
| __kmpc_atomic_float8_div_cpt |
| __kmpc_atomic_float8_div_cpt_rev |
| __kmpc_atomic_float8_div_fp |
| __kmpc_atomic_float8_div_rev |
| __kmpc_atomic_float8_max |
| __kmpc_atomic_float8_max_cpt |
| __kmpc_atomic_float8_min |
| __kmpc_atomic_float8_min_cpt |
| __kmpc_atomic_float8_mul |
| __kmpc_atomic_float8_mul_cpt |
| __kmpc_atomic_float8_mul_fp |
| __kmpc_atomic_float8_rd |
| __kmpc_atomic_float8_sub |
| __kmpc_atomic_float8_sub_cpt |
| __kmpc_atomic_float8_sub_cpt_rev |
| __kmpc_atomic_float8_sub_fp |
| __kmpc_atomic_float8_sub_rev |
| __kmpc_atomic_float8_swp |
| __kmpc_atomic_float8_wr |
| __kmpc_atomic_float10_add |
| __kmpc_atomic_float10_add_cpt |
| __kmpc_atomic_float10_add_fp |
| __kmpc_atomic_float10_div |
| __kmpc_atomic_float10_div_cpt |
| __kmpc_atomic_float10_div_cpt_rev |
| __kmpc_atomic_float10_div_fp |
| __kmpc_atomic_float10_div_rev |
| __kmpc_atomic_float10_mul |
| __kmpc_atomic_float10_mul_cpt |
| __kmpc_atomic_float10_mul_fp |
| __kmpc_atomic_float10_rd |
| __kmpc_atomic_float10_sub |
| __kmpc_atomic_float10_sub_cpt |
| __kmpc_atomic_float10_sub_cpt_rev |
| __kmpc_atomic_float10_sub_fp |
| __kmpc_atomic_float10_sub_rev |
| __kmpc_atomic_float10_swp |
| __kmpc_atomic_float10_wr |
| __kmpc_atomic_float16_add |
| __kmpc_atomic_float16_add_cpt |
| __kmpc_atomic_float16_div |
| __kmpc_atomic_float16_div_cpt |
| __kmpc_atomic_float16_div_cpt_rev |
| __kmpc_atomic_float16_div_rev |
| __kmpc_atomic_float16_max |
| __kmpc_atomic_float16_max_cpt |
| __kmpc_atomic_float16_min |
| __kmpc_atomic_float16_min_cpt |
| __kmpc_atomic_float16_mul |
| __kmpc_atomic_float16_mul_cpt |
| __kmpc_atomic_float16_rd |
| __kmpc_atomic_float16_sub |
| __kmpc_atomic_float16_sub_cpt |
| __kmpc_atomic_float16_sub_cpt_rev |
| __kmpc_atomic_float16_sub_rev |
| __kmpc_atomic_float16_swp |
| __kmpc_atomic_float16_wr |
| @endcode |
| |
| Functions for Complex types |
| --------------------------- |
| Functions for complex types whose component floating point variables are of size 4,8,10 or 16 bytes. |
| The names here are based on the size of the component float, *not* the size of the complex type. So |
| `__kmpc_atomc_cmplx8_add` is an operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. |
| |
| @code |
| __kmpc_atomic_cmplx4_add |
| __kmpc_atomic_cmplx4_add_cmplx8 |
| __kmpc_atomic_cmplx4_add_cpt |
| __kmpc_atomic_cmplx4_div |
| __kmpc_atomic_cmplx4_div_cmplx8 |
| __kmpc_atomic_cmplx4_div_cpt |
| __kmpc_atomic_cmplx4_div_cpt_rev |
| __kmpc_atomic_cmplx4_div_rev |
| __kmpc_atomic_cmplx4_mul |
| __kmpc_atomic_cmplx4_mul_cmplx8 |
| __kmpc_atomic_cmplx4_mul_cpt |
| __kmpc_atomic_cmplx4_rd |
| __kmpc_atomic_cmplx4_sub |
| __kmpc_atomic_cmplx4_sub_cmplx8 |
| __kmpc_atomic_cmplx4_sub_cpt |
| __kmpc_atomic_cmplx4_sub_cpt_rev |
| __kmpc_atomic_cmplx4_sub_rev |
| __kmpc_atomic_cmplx4_swp |
| __kmpc_atomic_cmplx4_wr |
| __kmpc_atomic_cmplx8_add |
| __kmpc_atomic_cmplx8_add_cpt |
| __kmpc_atomic_cmplx8_div |
| __kmpc_atomic_cmplx8_div_cpt |
| __kmpc_atomic_cmplx8_div_cpt_rev |
| __kmpc_atomic_cmplx8_div_rev |
| __kmpc_atomic_cmplx8_mul |
| __kmpc_atomic_cmplx8_mul_cpt |
| __kmpc_atomic_cmplx8_rd |
| __kmpc_atomic_cmplx8_sub |
| __kmpc_atomic_cmplx8_sub_cpt |
| __kmpc_atomic_cmplx8_sub_cpt_rev |
| __kmpc_atomic_cmplx8_sub_rev |
| __kmpc_atomic_cmplx8_swp |
| __kmpc_atomic_cmplx8_wr |
| __kmpc_atomic_cmplx10_add |
| __kmpc_atomic_cmplx10_add_cpt |
| __kmpc_atomic_cmplx10_div |
| __kmpc_atomic_cmplx10_div_cpt |
| __kmpc_atomic_cmplx10_div_cpt_rev |
| __kmpc_atomic_cmplx10_div_rev |
| __kmpc_atomic_cmplx10_mul |
| __kmpc_atomic_cmplx10_mul_cpt |
| __kmpc_atomic_cmplx10_rd |
| __kmpc_atomic_cmplx10_sub |
| __kmpc_atomic_cmplx10_sub_cpt |
| __kmpc_atomic_cmplx10_sub_cpt_rev |
| __kmpc_atomic_cmplx10_sub_rev |
| __kmpc_atomic_cmplx10_swp |
| __kmpc_atomic_cmplx10_wr |
| __kmpc_atomic_cmplx16_add |
| __kmpc_atomic_cmplx16_add_cpt |
| __kmpc_atomic_cmplx16_div |
| __kmpc_atomic_cmplx16_div_cpt |
| __kmpc_atomic_cmplx16_div_cpt_rev |
| __kmpc_atomic_cmplx16_div_rev |
| __kmpc_atomic_cmplx16_mul |
| __kmpc_atomic_cmplx16_mul_cpt |
| __kmpc_atomic_cmplx16_rd |
| __kmpc_atomic_cmplx16_sub |
| __kmpc_atomic_cmplx16_sub_cpt |
| __kmpc_atomic_cmplx16_sub_cpt_rev |
| __kmpc_atomic_cmplx16_swp |
| __kmpc_atomic_cmplx16_wr |
| @endcode |
| */ |
| |
| /*! |
| @ingroup ATOMIC_OPS |
| @{ |
| */ |
| |
| /* |
| * Global vars |
| */ |
| |
| #ifndef KMP_GOMP_COMPAT |
| int __kmp_atomic_mode = 1; // Intel perf |
| #else |
| int __kmp_atomic_mode = 2; // GOMP compatibility |
| #endif /* KMP_GOMP_COMPAT */ |
| |
| KMP_ALIGN(128) |
| |
| kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */ |
| kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */ |
| kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */ |
| kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */ |
| kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */ |
| kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */ |
| kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */ |
| kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */ |
| kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */ |
| kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */ |
| kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/ |
| kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/ |
| kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */ |
| |
| |
| /* |
| 2007-03-02: |
| Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a |
| bug on *_32 and *_32e. This is just a temporary workaround for the problem. |
| It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG |
| routines in assembler language. |
| */ |
| #define KMP_ATOMIC_VOLATILE volatile |
| |
| #if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD |
| |
| static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; }; |
| static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; }; |
| static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; }; |
| static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; }; |
| static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; } |
| static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; } |
| |
| static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; }; |
| static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; }; |
| static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; }; |
| static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; }; |
| static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; } |
| static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; } |
| |
| static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; }; |
| static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; }; |
| static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; }; |
| static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; }; |
| |
| static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; }; |
| static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; }; |
| static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; }; |
| static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; }; |
| |
| #endif |
| |
| /* ------------------------------------------------------------------------ */ |
| /* ATOMIC implementation routines */ |
| /* one routine for each operation and operand type */ |
| /* ------------------------------------------------------------------------ */ |
| |
| // All routines declarations looks like |
| // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); |
| // ------------------------------------------------------------------------ |
| |
| #define KMP_CHECK_GTID \ |
| if ( gtid == KMP_GTID_UNKNOWN ) { \ |
| gtid = __kmp_entry_gtid(); \ |
| } // check and get gtid when needed |
| |
| // Beginning of a definition (provides name, parameters, gebug trace) |
| // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) |
| // OP_ID - operation identifier (add, sub, mul, ...) |
| // TYPE - operands' type |
| #define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ |
| RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ |
| { \ |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); \ |
| KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); |
| |
| // ------------------------------------------------------------------------ |
| // Lock variables used for critical sections for various size operands |
| #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat |
| #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char |
| #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short |
| #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int |
| #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float |
| #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int |
| #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double |
| #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex |
| #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double |
| #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad |
| #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex |
| #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex |
| #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex |
| |
| // ------------------------------------------------------------------------ |
| // Operation on *lhs, rhs bound by critical section |
| // OP - operator (it's supposed to contain an assignment) |
| // LCK_ID - lock identifier |
| // Note: don't check gtid as it should always be valid |
| // 1, 2-byte - expect valid parameter, other - check before this macro |
| #define OP_CRITICAL(OP,LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| (*lhs) OP (rhs); \ |
| \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); |
| |
| // ------------------------------------------------------------------------ |
| // For GNU compatibility, we may need to use a critical section, |
| // even though it is not required by the ISA. |
| // |
| // On IA-32 architecture, all atomic operations except for fixed 4 byte add, |
| // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common |
| // critical section. On Intel(R) 64, all atomic operations are done with fetch |
| // and add or compare and exchange. Therefore, the FLAG parameter to this |
| // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which |
| // require a critical section, where we predict that they will be implemented |
| // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). |
| // |
| // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, |
| // the FLAG parameter should always be 1. If we know that we will be using |
| // a critical section, then we want to make certain that we use the generic |
| // lock __kmp_atomic_lock to protect the atomic update, and not of of the |
| // locks that are specialized based upon the size or type of the data. |
| // |
| // If FLAG is 0, then we are relying on dead code elimination by the build |
| // compiler to get rid of the useless block of code, and save a needless |
| // branch at runtime. |
| // |
| |
| #ifdef KMP_GOMP_COMPAT |
| # define OP_GOMP_CRITICAL(OP,FLAG) \ |
| if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL( OP, 0 ); \ |
| return; \ |
| } |
| # else |
| # define OP_GOMP_CRITICAL(OP,FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| |
| #if KMP_MIC |
| # define KMP_DO_PAUSE _mm_delay_32( 1 ) |
| #else |
| # define KMP_DO_PAUSE KMP_CPU_PAUSE() |
| #endif /* KMP_MIC */ |
| |
| // ------------------------------------------------------------------------ |
| // Operation on *lhs, rhs using "compare_and_store" routine |
| // TYPE - operands' type |
| // BITS - size in bits, used to distinguish low level calls |
| // OP - operator |
| #define OP_CMPXCHG(TYPE,BITS,OP) \ |
| { \ |
| TYPE old_value, new_value; \ |
| old_value = *(TYPE volatile *)lhs; \ |
| new_value = old_value OP rhs; \ |
| while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ |
| { \ |
| KMP_DO_PAUSE; \ |
| \ |
| old_value = *(TYPE volatile *)lhs; \ |
| new_value = old_value OP rhs; \ |
| } \ |
| } |
| |
| #if USE_CMPXCHG_FIX |
| // 2007-06-25: |
| // workaround for C78287 (complex(kind=4) data type) |
| // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm) |
| // Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro. |
| // This is a problem of the compiler. |
| // Related tracker is C76005, targeted to 11.0. |
| // I verified the asm of the workaround. |
| #define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ |
| { \ |
| char anonym[ ( sizeof( TYPE ) == sizeof( kmp_int##BITS ) ) ? ( 1 ) : ( 0 ) ] = { 1 }; \ |
| struct _sss { \ |
| TYPE cmp; \ |
| kmp_int##BITS *vvv; \ |
| }; \ |
| struct _sss old_value, new_value; \ |
| old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \ |
| new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \ |
| *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \ |
| new_value.cmp = old_value.cmp OP rhs; \ |
| while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ |
| *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ |
| *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \ |
| { \ |
| KMP_DO_PAUSE; \ |
| \ |
| *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \ |
| new_value.cmp = old_value.cmp OP rhs; \ |
| } \ |
| } |
| // end of the first part of the workaround for C78287 |
| #endif // USE_CMPXCHG_FIX |
| |
| #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| |
| // ------------------------------------------------------------------------ |
| // X86 or X86_64: no alignment problems ==================================== |
| #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ |
| /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ |
| KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ |
| } |
| // ------------------------------------------------------------------------- |
| #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ |
| OP_CMPXCHG(TYPE,BITS,OP) \ |
| } |
| #if USE_CMPXCHG_FIX |
| // ------------------------------------------------------------------------- |
| // workaround for C78287 (complex(kind=4) data type) |
| #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ |
| OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ |
| } |
| // end of the second part of the workaround for C78287 |
| #endif |
| |
| #else |
| // ------------------------------------------------------------------------- |
| // Code for other architectures that don't handle unaligned accesses. |
| #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ |
| if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ |
| /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ |
| KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ |
| } else { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ |
| } \ |
| } |
| // ------------------------------------------------------------------------- |
| #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ |
| if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ |
| OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ |
| } else { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ |
| } \ |
| } |
| #if USE_CMPXCHG_FIX |
| // ------------------------------------------------------------------------- |
| // workaround for C78287 (complex(kind=4) data type) |
| #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ |
| if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ |
| OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ |
| } else { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ |
| } \ |
| } |
| // end of the second part of the workaround for C78287 |
| #endif // USE_CMPXCHG_FIX |
| #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
| |
| // Routines for ATOMIC 4-byte operands addition and subtraction |
| ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add |
| ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub |
| |
| ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add |
| ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub |
| |
| // Routines for ATOMIC 8-byte operands addition and subtraction |
| ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add |
| ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub |
| |
| ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add |
| ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub |
| |
| // ------------------------------------------------------------------------ |
| // Entries definition for integer operands |
| // TYPE_ID - operands type and size (fixed4, float4) |
| // OP_ID - operation identifier (add, sub, mul, ...) |
| // TYPE - operand type |
| // BITS - size in bits, used to distinguish low level calls |
| // OP - operator (used in critical section) |
| // LCK_ID - lock identifier, used to possibly distinguish lock variable |
| // MASK - used for alignment check |
| |
| // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG |
| // ------------------------------------------------------------------------ |
| // Routines for ATOMIC integer operands, other operators |
| // ------------------------------------------------------------------------ |
| // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG |
| ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add |
| ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb |
| ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div |
| ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div |
| ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul |
| ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb |
| ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl |
| ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr |
| ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr |
| ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub |
| ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor |
| ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add |
| ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb |
| ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div |
| ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div |
| ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul |
| ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb |
| ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl |
| ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr |
| ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr |
| ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub |
| ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor |
| ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb |
| ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div |
| ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div |
| ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul |
| ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb |
| ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl |
| ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr |
| ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr |
| ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor |
| ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb |
| ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div |
| ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div |
| ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul |
| ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb |
| ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl |
| ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr |
| ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr |
| ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor |
| ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div |
| ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul |
| ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div |
| ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul |
| // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG |
| |
| |
| /* ------------------------------------------------------------------------ */ |
| /* Routines for C/C++ Reduction operators && and || */ |
| /* ------------------------------------------------------------------------ */ |
| |
| // ------------------------------------------------------------------------ |
| // Need separate macros for &&, || because there is no combined assignment |
| // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used |
| #define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \ |
| OP_CRITICAL( = *lhs OP, LCK_ID ) \ |
| } |
| |
| #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| |
| // ------------------------------------------------------------------------ |
| // X86 or X86_64: no alignment problems =================================== |
| #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \ |
| OP_CMPXCHG(TYPE,BITS,OP) \ |
| } |
| |
| #else |
| // ------------------------------------------------------------------------ |
| // Code for other architectures that don't handle unaligned accesses. |
| #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \ |
| if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ |
| OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ |
| } else { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \ |
| } \ |
| } |
| #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
| |
| ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl |
| ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl |
| ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl |
| ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl |
| ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl |
| ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl |
| ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl |
| ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl |
| |
| |
| /* ------------------------------------------------------------------------- */ |
| /* Routines for Fortran operators that matched no one in C: */ |
| /* MAX, MIN, .EQV., .NEQV. */ |
| /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ |
| /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ |
| /* ------------------------------------------------------------------------- */ |
| |
| // ------------------------------------------------------------------------- |
| // MIN and MAX need separate macros |
| // OP - operator to check if we need any actions? |
| #define MIN_MAX_CRITSECT(OP,LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| if ( *lhs OP rhs ) { /* still need actions? */ \ |
| *lhs = rhs; \ |
| } \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); |
| |
| // ------------------------------------------------------------------------- |
| #ifdef KMP_GOMP_COMPAT |
| #define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \ |
| if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \ |
| KMP_CHECK_GTID; \ |
| MIN_MAX_CRITSECT( OP, 0 ); \ |
| return; \ |
| } |
| #else |
| #define GOMP_MIN_MAX_CRITSECT(OP,FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| |
| // ------------------------------------------------------------------------- |
| #define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \ |
| { \ |
| TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
| TYPE old_value; \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| while ( old_value OP rhs && /* still need actions? */ \ |
| ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \ |
| { \ |
| KMP_CPU_PAUSE(); \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| } \ |
| } |
| |
| // ------------------------------------------------------------------------- |
| // 1-byte, 2-byte operands - use critical section |
| #define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| if ( *lhs OP rhs ) { /* need actions? */ \ |
| GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ |
| MIN_MAX_CRITSECT(OP,LCK_ID) \ |
| } \ |
| } |
| |
| #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| |
| // ------------------------------------------------------------------------- |
| // X86 or X86_64: no alignment problems ==================================== |
| #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| if ( *lhs OP rhs ) { \ |
| GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ |
| MIN_MAX_CMPXCHG(TYPE,BITS,OP) \ |
| } \ |
| } |
| |
| #else |
| // ------------------------------------------------------------------------- |
| // Code for other architectures that don't handle unaligned accesses. |
| #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| if ( *lhs OP rhs ) { \ |
| GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ |
| if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ |
| MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ |
| } else { \ |
| KMP_CHECK_GTID; \ |
| MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \ |
| } \ |
| } \ |
| } |
| #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
| |
| MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max |
| MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min |
| MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max |
| MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min |
| MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max |
| MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min |
| MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max |
| MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min |
| MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max |
| MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min |
| MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max |
| MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min |
| #if KMP_HAVE_QUAD |
| MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max |
| MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min |
| #if ( KMP_ARCH_X86 ) |
| MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16 |
| MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16 |
| #endif |
| #endif |
| // ------------------------------------------------------------------------ |
| // Need separate macros for .EQV. because of the need of complement (~) |
| // OP ignored for critical sections, ^=~ used instead |
| #define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \ |
| OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \ |
| } |
| |
| // ------------------------------------------------------------------------ |
| #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| // ------------------------------------------------------------------------ |
| // X86 or X86_64: no alignment problems =================================== |
| #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \ |
| OP_CMPXCHG(TYPE,BITS,OP) \ |
| } |
| // ------------------------------------------------------------------------ |
| #else |
| // ------------------------------------------------------------------------ |
| // Code for other architectures that don't handle unaligned accesses. |
| #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \ |
| if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ |
| OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ |
| } else { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \ |
| } \ |
| } |
| #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
| |
| ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv |
| ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv |
| ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv |
| ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv |
| ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv |
| ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv |
| ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv |
| ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv |
| |
| |
| // ------------------------------------------------------------------------ |
| // Routines for Extended types: long double, _Quad, complex flavours (use critical section) |
| // TYPE_ID, OP_ID, TYPE - detailed above |
| // OP - operator |
| // LCK_ID - lock identifier, used to possibly distinguish lock variable |
| #define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \ |
| OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \ |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| // routines for long double type |
| ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add |
| ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub |
| ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul |
| ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div |
| #if KMP_HAVE_QUAD |
| // routines for _Quad type |
| ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add |
| ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub |
| ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul |
| ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16 |
| ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16 |
| ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16 |
| ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16 |
| #endif |
| #endif |
| // routines for complex types |
| |
| #if USE_CMPXCHG_FIX |
| // workaround for C78287 (complex(kind=4) data type) |
| ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add |
| ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub |
| ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul |
| ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div |
| // end of the workaround for C78287 |
| #else |
| ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add |
| ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub |
| ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul |
| ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div |
| #endif // USE_CMPXCHG_FIX |
| |
| ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add |
| ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub |
| ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul |
| ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div |
| ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add |
| ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub |
| ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul |
| ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div |
| #if KMP_HAVE_QUAD |
| ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add |
| ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub |
| ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul |
| ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16 |
| ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16 |
| ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16 |
| ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16 |
| #endif |
| #endif |
| |
| #if OMP_40_ENABLED |
| |
| // OpenMP 4.0: x = expr binop x for non-commutative operations. |
| // Supported only on IA-32 architecture and Intel(R) 64 |
| #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| |
| // ------------------------------------------------------------------------ |
| // Operation on *lhs, rhs bound by critical section |
| // OP - operator (it's supposed to contain an assignment) |
| // LCK_ID - lock identifier |
| // Note: don't check gtid as it should always be valid |
| // 1, 2-byte - expect valid parameter, other - check before this macro |
| #define OP_CRITICAL_REV(OP,LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| (*lhs) = (rhs) OP (*lhs); \ |
| \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| #define OP_GOMP_CRITICAL_REV(OP,FLAG) \ |
| if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL_REV( OP, 0 ); \ |
| return; \ |
| } |
| #else |
| #define OP_GOMP_CRITICAL_REV(OP,FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| |
| |
| // Beginning of a definition (provides name, parameters, gebug trace) |
| // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) |
| // OP_ID - operation identifier (add, sub, mul, ...) |
| // TYPE - operands' type |
| #define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ |
| RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ |
| { \ |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); \ |
| KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid )); |
| |
| // ------------------------------------------------------------------------ |
| // Operation on *lhs, rhs using "compare_and_store" routine |
| // TYPE - operands' type |
| // BITS - size in bits, used to distinguish low level calls |
| // OP - operator |
| // Note: temp_val introduced in order to force the compiler to read |
| // *lhs only once (w/o it the compiler reads *lhs twice) |
| #define OP_CMPXCHG_REV(TYPE,BITS,OP) \ |
| { \ |
| TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
| TYPE old_value, new_value; \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| new_value = rhs OP old_value; \ |
| while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ |
| { \ |
| KMP_DO_PAUSE; \ |
| \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| new_value = rhs OP old_value; \ |
| } \ |
| } |
| |
| // ------------------------------------------------------------------------- |
| #define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ |
| OP_CMPXCHG_REV(TYPE,BITS,OP) \ |
| } |
| |
| // ------------------------------------------------------------------------ |
| // Entries definition for integer operands |
| // TYPE_ID - operands type and size (fixed4, float4) |
| // OP_ID - operation identifier (add, sub, mul, ...) |
| // TYPE - operand type |
| // BITS - size in bits, used to distinguish low level calls |
| // OP - operator (used in critical section) |
| // LCK_ID - lock identifier, used to possibly distinguish lock variable |
| |
| // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG |
| // ------------------------------------------------------------------------ |
| // Routines for ATOMIC integer operands, other operators |
| // ------------------------------------------------------------------------ |
| // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG |
| ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev |
| ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev |
| ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev |
| ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev |
| ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev |
| ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev |
| |
| ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev |
| ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev |
| ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev |
| ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev |
| ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev |
| ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev |
| |
| ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev |
| ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev |
| ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev |
| ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev |
| ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev |
| ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev |
| |
| ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev |
| ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev |
| ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev |
| ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev |
| ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev |
| ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev |
| |
| ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev |
| ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev |
| |
| ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev |
| ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev |
| // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG |
| |
| // ------------------------------------------------------------------------ |
| // Routines for Extended types: long double, _Quad, complex flavours (use critical section) |
| // TYPE_ID, OP_ID, TYPE - detailed above |
| // OP - operator |
| // LCK_ID - lock identifier, used to possibly distinguish lock variable |
| #define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ |
| OP_CRITICAL_REV(OP,LCK_ID) \ |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| // routines for long double type |
| ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev |
| ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev |
| #if KMP_HAVE_QUAD |
| // routines for _Quad type |
| ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev |
| ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev |
| ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev |
| #endif |
| #endif |
| |
| // routines for complex types |
| ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev |
| ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev |
| ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev |
| ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev |
| ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev |
| ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev |
| #if KMP_HAVE_QUAD |
| ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev |
| ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev |
| ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev |
| #endif |
| #endif |
| |
| |
| #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| // End of OpenMP 4.0: x = expr binop x for non-commutative operations. |
| |
| #endif //OMP_40_ENABLED |
| |
| |
| /* ------------------------------------------------------------------------ */ |
| /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ |
| /* Note: in order to reduce the total number of types combinations */ |
| /* it is supposed that compiler converts RHS to longest floating type,*/ |
| /* that is _Quad, before call to any of these routines */ |
| /* Conversion to _Quad will be done by the compiler during calculation, */ |
| /* conversion back to TYPE - before the assignment, like: */ |
| /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ |
| /* Performance penalty expected because of SW emulation use */ |
| /* ------------------------------------------------------------------------ */ |
| |
| #define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ |
| void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \ |
| { \ |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); \ |
| KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid )); |
| |
| // ------------------------------------------------------------------------- |
| #define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \ |
| OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \ |
| } |
| |
| // ------------------------------------------------------------------------- |
| #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| // ------------------------------------------------------------------------- |
| // X86 or X86_64: no alignment problems ==================================== |
| #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ |
| OP_CMPXCHG(TYPE,BITS,OP) \ |
| } |
| // ------------------------------------------------------------------------- |
| #else |
| // ------------------------------------------------------------------------ |
| // Code for other architectures that don't handle unaligned accesses. |
| #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ |
| if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ |
| OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ |
| } else { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ |
| } \ |
| } |
| #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
| |
| // RHS=float8 |
| ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8 |
| ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8 |
| ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8 |
| ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8 |
| ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8 |
| ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8 |
| ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8 |
| ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8 |
| ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8 |
| ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8 |
| ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8 |
| ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8 |
| |
| // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them) |
| #if KMP_HAVE_QUAD |
| ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp |
| ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp |
| ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp |
| ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp |
| ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp |
| |
| ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp |
| ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp |
| ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp |
| ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp |
| ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp |
| |
| ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp |
| ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp |
| ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp |
| ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp |
| ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp |
| |
| ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp |
| ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp |
| ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp |
| ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp |
| ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp |
| |
| ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp |
| ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp |
| ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp |
| ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp |
| |
| ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp |
| ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp |
| ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp |
| ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp |
| |
| ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp |
| ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp |
| ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp |
| ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp |
| #endif |
| |
| #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| // ------------------------------------------------------------------------ |
| // X86 or X86_64: no alignment problems ==================================== |
| #if USE_CMPXCHG_FIX |
| // workaround for C78287 (complex(kind=4) data type) |
| #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ |
| OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ |
| } |
| // end of the second part of the workaround for C78287 |
| #else |
| #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ |
| OP_CMPXCHG(TYPE,BITS,OP) \ |
| } |
| #endif // USE_CMPXCHG_FIX |
| #else |
| // ------------------------------------------------------------------------ |
| // Code for other architectures that don't handle unaligned accesses. |
| #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ |
| ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ |
| OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ |
| if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ |
| OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ |
| } else { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ |
| } \ |
| } |
| #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
| |
| ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8 |
| ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8 |
| ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8 |
| ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8 |
| |
| // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 |
| #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| |
| ////////////////////////////////////////////////////////////////////////////////////////////////////// |
| // ------------------------------------------------------------------------ |
| // Atomic READ routines |
| // ------------------------------------------------------------------------ |
| |
| // ------------------------------------------------------------------------ |
| // Beginning of a definition (provides name, parameters, gebug trace) |
| // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) |
| // OP_ID - operation identifier (add, sub, mul, ...) |
| // TYPE - operands' type |
| #define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ |
| RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \ |
| { \ |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); \ |
| KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); |
| |
| // ------------------------------------------------------------------------ |
| // Operation on *lhs, rhs using "compare_and_store_ret" routine |
| // TYPE - operands' type |
| // BITS - size in bits, used to distinguish low level calls |
| // OP - operator |
| // Note: temp_val introduced in order to force the compiler to read |
| // *lhs only once (w/o it the compiler reads *lhs twice) |
| // TODO: check if it is still necessary |
| // Return old value regardless of the result of "compare & swap# operation |
| |
| #define OP_CMPXCHG_READ(TYPE,BITS,OP) \ |
| { \ |
| TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
| union f_i_union { \ |
| TYPE f_val; \ |
| kmp_int##BITS i_val; \ |
| }; \ |
| union f_i_union old_value; \ |
| temp_val = *loc; \ |
| old_value.f_val = temp_val; \ |
| old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \ |
| new_value = old_value.f_val; \ |
| return new_value; \ |
| } |
| |
| // ------------------------------------------------------------------------- |
| // Operation on *lhs, rhs bound by critical section |
| // OP - operator (it's supposed to contain an assignment) |
| // LCK_ID - lock identifier |
| // Note: don't check gtid as it should always be valid |
| // 1, 2-byte - expect valid parameter, other - check before this macro |
| #define OP_CRITICAL_READ(OP,LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| new_value = (*loc); \ |
| \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); |
| |
| // ------------------------------------------------------------------------- |
| #ifdef KMP_GOMP_COMPAT |
| #define OP_GOMP_CRITICAL_READ(OP,FLAG) \ |
| if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL_READ( OP, 0 ); \ |
| return new_value; \ |
| } |
| #else |
| #define OP_GOMP_CRITICAL_READ(OP,FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| |
| // ------------------------------------------------------------------------- |
| #define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ |
| ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ |
| TYPE new_value; \ |
| OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \ |
| new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \ |
| return new_value; \ |
| } |
| // ------------------------------------------------------------------------- |
| #define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ |
| ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ |
| TYPE new_value; \ |
| OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \ |
| OP_CMPXCHG_READ(TYPE,BITS,OP) \ |
| } |
| // ------------------------------------------------------------------------ |
| // Routines for Extended types: long double, _Quad, complex flavours (use critical section) |
| // TYPE_ID, OP_ID, TYPE - detailed above |
| // OP - operator |
| // LCK_ID - lock identifier, used to possibly distinguish lock variable |
| #define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ |
| TYPE new_value; \ |
| OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \ |
| OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \ |
| return new_value; \ |
| } |
| |
| // ------------------------------------------------------------------------ |
| // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work. |
| // Let's return the read value through the additional parameter. |
| |
| #if ( KMP_OS_WINDOWS ) |
| |
| #define OP_CRITICAL_READ_WRK(OP,LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| (*out) = (*loc); \ |
| \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); |
| // ------------------------------------------------------------------------ |
| #ifdef KMP_GOMP_COMPAT |
| #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \ |
| if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL_READ_WRK( OP, 0 ); \ |
| } |
| #else |
| #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| // ------------------------------------------------------------------------ |
| #define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \ |
| void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \ |
| { \ |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); \ |
| KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); |
| |
| // ------------------------------------------------------------------------ |
| #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \ |
| OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \ |
| OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \ |
| } |
| |
| #endif // KMP_OS_WINDOWS |
| |
| // ------------------------------------------------------------------------ |
| // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG |
| ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd |
| ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd |
| ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd |
| ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd |
| |
| // !!! TODO: Remove lock operations for "char" since it can't be non-atomic |
| ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd |
| ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd |
| |
| ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd |
| #if KMP_HAVE_QUAD |
| ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd |
| #endif // KMP_HAVE_QUAD |
| |
| // Fix for CQ220361 on Windows* OS |
| #if ( KMP_OS_WINDOWS ) |
| ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd |
| #else |
| ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd |
| #endif |
| ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd |
| ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd |
| #if KMP_HAVE_QUAD |
| ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd |
| ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd |
| #endif |
| #endif |
| |
| |
| // ------------------------------------------------------------------------ |
| // Atomic WRITE routines |
| // ------------------------------------------------------------------------ |
| |
| #define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ |
| KMP_XCHG_FIXED##BITS( lhs, rhs ); \ |
| } |
| // ------------------------------------------------------------------------ |
| #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ |
| KMP_XCHG_REAL##BITS( lhs, rhs ); \ |
| } |
| |
| |
| // ------------------------------------------------------------------------ |
| // Operation on *lhs, rhs using "compare_and_store" routine |
| // TYPE - operands' type |
| // BITS - size in bits, used to distinguish low level calls |
| // OP - operator |
| // Note: temp_val introduced in order to force the compiler to read |
| // *lhs only once (w/o it the compiler reads *lhs twice) |
| #define OP_CMPXCHG_WR(TYPE,BITS,OP) \ |
| { \ |
| TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
| TYPE old_value, new_value; \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| new_value = rhs; \ |
| while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ |
| { \ |
| KMP_CPU_PAUSE(); \ |
| \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| new_value = rhs; \ |
| } \ |
| } |
| |
| // ------------------------------------------------------------------------- |
| #define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ |
| OP_CMPXCHG_WR(TYPE,BITS,OP) \ |
| } |
| |
| // ------------------------------------------------------------------------ |
| // Routines for Extended types: long double, _Quad, complex flavours (use critical section) |
| // TYPE_ID, OP_ID, TYPE - detailed above |
| // OP - operator |
| // LCK_ID - lock identifier, used to possibly distinguish lock variable |
| #define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ |
| OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \ |
| OP_CRITICAL(OP,LCK_ID) /* send assignment */ \ |
| } |
| // ------------------------------------------------------------------------- |
| |
| ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr |
| ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr |
| ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr |
| #else |
| ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr |
| #endif |
| |
| ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr |
| #else |
| ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr |
| #endif |
| |
| ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr |
| #if KMP_HAVE_QUAD |
| ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr |
| #endif |
| ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr |
| ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr |
| ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr |
| #if KMP_HAVE_QUAD |
| ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr |
| ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr |
| #endif |
| #endif |
| |
| |
| // ------------------------------------------------------------------------ |
| // Atomic CAPTURE routines |
| // ------------------------------------------------------------------------ |
| |
| // Beginning of a definition (provides name, parameters, gebug trace) |
| // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) |
| // OP_ID - operation identifier (add, sub, mul, ...) |
| // TYPE - operands' type |
| #define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \ |
| RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \ |
| { \ |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); \ |
| KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); |
| |
| // ------------------------------------------------------------------------- |
| // Operation on *lhs, rhs bound by critical section |
| // OP - operator (it's supposed to contain an assignment) |
| // LCK_ID - lock identifier |
| // Note: don't check gtid as it should always be valid |
| // 1, 2-byte - expect valid parameter, other - check before this macro |
| #define OP_CRITICAL_CPT(OP,LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| if( flag ) { \ |
| (*lhs) OP rhs; \ |
| new_value = (*lhs); \ |
| } else { \ |
| new_value = (*lhs); \ |
| (*lhs) OP rhs; \ |
| } \ |
| \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| return new_value; |
| |
| // ------------------------------------------------------------------------ |
| #ifdef KMP_GOMP_COMPAT |
| #define OP_GOMP_CRITICAL_CPT(OP,FLAG) \ |
| if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL_CPT( OP##=, 0 ); \ |
| } |
| #else |
| #define OP_GOMP_CRITICAL_CPT(OP,FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| |
| // ------------------------------------------------------------------------ |
| // Operation on *lhs, rhs using "compare_and_store" routine |
| // TYPE - operands' type |
| // BITS - size in bits, used to distinguish low level calls |
| // OP - operator |
| // Note: temp_val introduced in order to force the compiler to read |
| // *lhs only once (w/o it the compiler reads *lhs twice) |
| #define OP_CMPXCHG_CPT(TYPE,BITS,OP) \ |
| { \ |
| TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
| TYPE old_value, new_value; \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| new_value = old_value OP rhs; \ |
| while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ |
| { \ |
| KMP_CPU_PAUSE(); \ |
| \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| new_value = old_value OP rhs; \ |
| } \ |
| if( flag ) { \ |
| return new_value; \ |
| } else \ |
| return old_value; \ |
| } |
| |
| // ------------------------------------------------------------------------- |
| #define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ |
| ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ |
| TYPE new_value; \ |
| OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \ |
| OP_CMPXCHG_CPT(TYPE,BITS,OP) \ |
| } |
| |
| // ------------------------------------------------------------------------- |
| #define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ |
| ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ |
| TYPE old_value, new_value; \ |
| OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \ |
| /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ |
| old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ |
| if( flag ) { \ |
| return old_value OP rhs; \ |
| } else \ |
| return old_value; \ |
| } |
| // ------------------------------------------------------------------------- |
| |
| ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt |
| ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt |
| ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt |
| ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt |
| |
| ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt |
| ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt |
| ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt |
| ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt |
| |
| // ------------------------------------------------------------------------ |
| // Entries definition for integer operands |
| // TYPE_ID - operands type and size (fixed4, float4) |
| // OP_ID - operation identifier (add, sub, mul, ...) |
| // TYPE - operand type |
| // BITS - size in bits, used to distinguish low level calls |
| // OP - operator (used in critical section) |
| // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG |
| // ------------------------------------------------------------------------ |
| // Routines for ATOMIC integer operands, other operators |
| // ------------------------------------------------------------------------ |
| // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG |
| ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt |
| ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt |
| ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt |
| ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt |
| ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt |
| ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt |
| ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt |
| ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt |
| ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt |
| ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt |
| ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt |
| ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt |
| ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt |
| ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt |
| ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt |
| ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt |
| ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt |
| ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt |
| ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt |
| ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt |
| ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt |
| ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt |
| ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt |
| ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt |
| ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt |
| ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt |
| ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt |
| ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt |
| ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt |
| ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt |
| ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt |
| ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt |
| ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt |
| ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt |
| ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt |
| ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt |
| ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt |
| ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt |
| ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt |
| ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt |
| ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt |
| ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt |
| ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt |
| ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt |
| // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG |
| |
| // ------------------------------------------------------------------------ |
| // Routines for C/C++ Reduction operators && and || |
| // ------------------------------------------------------------------------ |
| |
| // ------------------------------------------------------------------------- |
| // Operation on *lhs, rhs bound by critical section |
| // OP - operator (it's supposed to contain an assignment) |
| // LCK_ID - lock identifier |
| // Note: don't check gtid as it should always be valid |
| // 1, 2-byte - expect valid parameter, other - check before this macro |
| #define OP_CRITICAL_L_CPT(OP,LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| if( flag ) { \ |
| new_value OP rhs; \ |
| } else \ |
| new_value = (*lhs); \ |
| \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); |
| |
| // ------------------------------------------------------------------------ |
| #ifdef KMP_GOMP_COMPAT |
| #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \ |
| if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL_L_CPT( OP, 0 ); \ |
| return new_value; \ |
| } |
| #else |
| #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| |
| // ------------------------------------------------------------------------ |
| // Need separate macros for &&, || because there is no combined assignment |
| #define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ |
| ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ |
| TYPE new_value; \ |
| OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \ |
| OP_CMPXCHG_CPT(TYPE,BITS,OP) \ |
| } |
| |
| ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt |
| ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt |
| ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt |
| ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt |
| ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt |
| ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt |
| ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt |
| ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt |
| |
| |
| // ------------------------------------------------------------------------- |
| // Routines for Fortran operators that matched no one in C: |
| // MAX, MIN, .EQV., .NEQV. |
| // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt |
| // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt |
| // ------------------------------------------------------------------------- |
| |
| // ------------------------------------------------------------------------- |
| // MIN and MAX need separate macros |
| // OP - operator to check if we need any actions? |
| #define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| if ( *lhs OP rhs ) { /* still need actions? */ \ |
| old_value = *lhs; \ |
| *lhs = rhs; \ |
| if ( flag ) \ |
| new_value = rhs; \ |
| else \ |
| new_value = old_value; \ |
| } \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| return new_value; \ |
| |
| // ------------------------------------------------------------------------- |
| #ifdef KMP_GOMP_COMPAT |
| #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \ |
| if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \ |
| KMP_CHECK_GTID; \ |
| MIN_MAX_CRITSECT_CPT( OP, 0 ); \ |
| } |
| #else |
| #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| |
| // ------------------------------------------------------------------------- |
| #define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \ |
| { \ |
| TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
| /*TYPE old_value; */ \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| while ( old_value OP rhs && /* still need actions? */ \ |
| ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \ |
| { \ |
| KMP_CPU_PAUSE(); \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| } \ |
| if( flag ) \ |
| return rhs; \ |
| else \ |
| return old_value; \ |
| } |
| |
| // ------------------------------------------------------------------------- |
| // 1-byte, 2-byte operands - use critical section |
| #define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ |
| TYPE new_value, old_value; \ |
| if ( *lhs OP rhs ) { /* need actions? */ \ |
| GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \ |
| MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \ |
| } \ |
| return *lhs; \ |
| } |
| |
| #define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ |
| ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ |
| TYPE new_value, old_value; \ |
| if ( *lhs OP rhs ) { \ |
| GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \ |
| MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \ |
| } \ |
| return *lhs; \ |
| } |
| |
| |
| MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt |
| MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt |
| MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt |
| MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt |
| MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt |
| MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt |
| MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt |
| MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt |
| MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt |
| MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt |
| MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt |
| MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt |
| #if KMP_HAVE_QUAD |
| MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt |
| MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt |
| #if ( KMP_ARCH_X86 ) |
| MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt |
| MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt |
| #endif |
| #endif |
| |
| // ------------------------------------------------------------------------ |
| #ifdef KMP_GOMP_COMPAT |
| #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \ |
| if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL_CPT( OP, 0 ); \ |
| } |
| #else |
| #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| // ------------------------------------------------------------------------ |
| #define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ |
| ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ |
| TYPE new_value; \ |
| OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \ |
| OP_CMPXCHG_CPT(TYPE,BITS,OP) \ |
| } |
| |
| // ------------------------------------------------------------------------ |
| |
| ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt |
| ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt |
| ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt |
| ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt |
| ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt |
| ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt |
| ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt |
| ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt |
| |
| // ------------------------------------------------------------------------ |
| // Routines for Extended types: long double, _Quad, complex flavours (use critical section) |
| // TYPE_ID, OP_ID, TYPE - detailed above |
| // OP - operator |
| // LCK_ID - lock identifier, used to possibly distinguish lock variable |
| #define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ |
| TYPE new_value; \ |
| OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \ |
| OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \ |
| } |
| |
| // ------------------------------------------------------------------------ |
| |
| // Workaround for cmplx4. Regular routines with return value don't work |
| // on Win_32e. Let's return captured values through the additional parameter. |
| #define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| if( flag ) { \ |
| (*lhs) OP rhs; \ |
| (*out) = (*lhs); \ |
| } else { \ |
| (*out) = (*lhs); \ |
| (*lhs) OP rhs; \ |
| } \ |
| \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| return; |
| // ------------------------------------------------------------------------ |
| |
| #ifdef KMP_GOMP_COMPAT |
| #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \ |
| if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL_CPT_WRK( OP##=, 0 ); \ |
| } |
| #else |
| #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| // ------------------------------------------------------------------------ |
| |
| #define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ |
| void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \ |
| { \ |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); \ |
| KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); |
| // ------------------------------------------------------------------------ |
| |
| #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ |
| OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \ |
| OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \ |
| } |
| // The end of workaround for cmplx4 |
| |
| /* ------------------------------------------------------------------------- */ |
| // routines for long double type |
| ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt |
| ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt |
| ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt |
| ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt |
| #if KMP_HAVE_QUAD |
| // routines for _Quad type |
| ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt |
| ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt |
| ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt |
| ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt |
| ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt |
| ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt |
| ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt |
| #endif |
| #endif |
| |
| // routines for complex types |
| |
| // cmplx4 routines to return void |
| ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt |
| ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt |
| ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt |
| ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt |
| |
| ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt |
| ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt |
| ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt |
| ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt |
| ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt |
| ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt |
| ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt |
| ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt |
| #if KMP_HAVE_QUAD |
| ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt |
| ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt |
| ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt |
| ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt |
| ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt |
| ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt |
| ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt |
| #endif |
| #endif |
| |
| #if OMP_40_ENABLED |
| |
| // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations. |
| // Supported only on IA-32 architecture and Intel(R) 64 |
| |
| // ------------------------------------------------------------------------- |
| // Operation on *lhs, rhs bound by critical section |
| // OP - operator (it's supposed to contain an assignment) |
| // LCK_ID - lock identifier |
| // Note: don't check gtid as it should always be valid |
| // 1, 2-byte - expect valid parameter, other - check before this macro |
| #define OP_CRITICAL_CPT_REV(OP,LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| if( flag ) { \ |
| /*temp_val = (*lhs);*/\ |
| (*lhs) = (rhs) OP (*lhs); \ |
| new_value = (*lhs); \ |
| } else { \ |
| new_value = (*lhs);\ |
| (*lhs) = (rhs) OP (*lhs); \ |
| } \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| return new_value; |
| |
| // ------------------------------------------------------------------------ |
| #ifdef KMP_GOMP_COMPAT |
| #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \ |
| if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL_CPT_REV( OP, 0 ); \ |
| } |
| #else |
| #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| |
| // ------------------------------------------------------------------------ |
| // Operation on *lhs, rhs using "compare_and_store" routine |
| // TYPE - operands' type |
| // BITS - size in bits, used to distinguish low level calls |
| // OP - operator |
| // Note: temp_val introduced in order to force the compiler to read |
| // *lhs only once (w/o it the compiler reads *lhs twice) |
| #define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \ |
| { \ |
| TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
| TYPE old_value, new_value; \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| new_value = rhs OP old_value; \ |
| while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ |
| { \ |
| KMP_CPU_PAUSE(); \ |
| \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| new_value = rhs OP old_value; \ |
| } \ |
| if( flag ) { \ |
| return new_value; \ |
| } else \ |
| return old_value; \ |
| } |
| |
| // ------------------------------------------------------------------------- |
| #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ |
| ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ |
| TYPE new_value; \ |
| TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
| OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \ |
| OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \ |
| } |
| |
| |
| ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev |
| ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev |
| // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG |
| |
| |
| // ------------------------------------------------------------------------ |
| // Routines for Extended types: long double, _Quad, complex flavours (use critical section) |
| // TYPE_ID, OP_ID, TYPE - detailed above |
| // OP - operator |
| // LCK_ID - lock identifier, used to possibly distinguish lock variable |
| #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ |
| TYPE new_value; \ |
| TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
| /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\ |
| OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \ |
| OP_CRITICAL_CPT_REV(OP,LCK_ID) \ |
| } |
| |
| |
| /* ------------------------------------------------------------------------- */ |
| // routines for long double type |
| ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev |
| ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev |
| #if KMP_HAVE_QUAD |
| // routines for _Quad type |
| ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev |
| ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev |
| ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev |
| #endif |
| #endif |
| |
| // routines for complex types |
| |
| // ------------------------------------------------------------------------ |
| |
| // Workaround for cmplx4. Regular routines with return value don't work |
| // on Win_32e. Let's return captured values through the additional parameter. |
| #define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| if( flag ) { \ |
| (*lhs) = (rhs) OP (*lhs); \ |
| (*out) = (*lhs); \ |
| } else { \ |
| (*out) = (*lhs); \ |
| (*lhs) = (rhs) OP (*lhs); \ |
| } \ |
| \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| return; |
| // ------------------------------------------------------------------------ |
| |
| #ifdef KMP_GOMP_COMPAT |
| #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \ |
| if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ |
| KMP_CHECK_GTID; \ |
| OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \ |
| } |
| #else |
| #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| // ------------------------------------------------------------------------ |
| |
| #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ |
| OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \ |
| OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \ |
| } |
| // The end of workaround for cmplx4 |
| |
| |
| // !!! TODO: check if we need to return void for cmplx4 routines |
| // cmplx4 routines to return void |
| ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev |
| ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev |
| |
| ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev |
| ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev |
| ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev |
| ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev |
| #if KMP_HAVE_QUAD |
| ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev |
| ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev |
| ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev |
| #endif |
| #endif |
| |
| // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} |
| |
| #define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ |
| TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ |
| { \ |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); \ |
| KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid )); |
| |
| #define CRITICAL_SWP(LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| old_value = (*lhs); \ |
| (*lhs) = rhs; \ |
| \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| return old_value; |
| |
| // ------------------------------------------------------------------------ |
| #ifdef KMP_GOMP_COMPAT |
| #define GOMP_CRITICAL_SWP(FLAG) \ |
| if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ |
| KMP_CHECK_GTID; \ |
| CRITICAL_SWP( 0 ); \ |
| } |
| #else |
| #define GOMP_CRITICAL_SWP(FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| |
| |
| #define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ |
| ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ |
| TYPE old_value; \ |
| GOMP_CRITICAL_SWP(GOMP_FLAG) \ |
| old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \ |
| return old_value; \ |
| } |
| // ------------------------------------------------------------------------ |
| #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ |
| ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ |
| TYPE old_value; \ |
| GOMP_CRITICAL_SWP(GOMP_FLAG) \ |
| old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \ |
| return old_value; \ |
| } |
| |
| // ------------------------------------------------------------------------ |
| #define CMPXCHG_SWP(TYPE,BITS) \ |
| { \ |
| TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
| TYPE old_value, new_value; \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| new_value = rhs; \ |
| while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ |
| *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ |
| { \ |
| KMP_CPU_PAUSE(); \ |
| \ |
| temp_val = *lhs; \ |
| old_value = temp_val; \ |
| new_value = rhs; \ |
| } \ |
| return old_value; \ |
| } |
| |
| // ------------------------------------------------------------------------- |
| #define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ |
| ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ |
| TYPE old_value; \ |
| GOMP_CRITICAL_SWP(GOMP_FLAG) \ |
| CMPXCHG_SWP(TYPE,BITS) \ |
| } |
| |
| ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp |
| ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp |
| ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp |
| |
| ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp |
| |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp |
| ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp |
| #else |
| ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp |
| ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp |
| #endif |
| |
| // ------------------------------------------------------------------------ |
| // Routines for Extended types: long double, _Quad, complex flavours (use critical section) |
| #define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ |
| TYPE old_value; \ |
| GOMP_CRITICAL_SWP(GOMP_FLAG) \ |
| CRITICAL_SWP(LCK_ID) \ |
| } |
| |
| // ------------------------------------------------------------------------ |
| |
| // !!! TODO: check if we need to return void for cmplx4 routines |
| // Workaround for cmplx4. Regular routines with return value don't work |
| // on Win_32e. Let's return captured values through the additional parameter. |
| |
| #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \ |
| void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \ |
| { \ |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); \ |
| KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid )); |
| |
| |
| #define CRITICAL_SWP_WRK(LCK_ID) \ |
| __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| \ |
| tmp = (*lhs); \ |
| (*lhs) = (rhs); \ |
| (*out) = tmp; \ |
| __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ |
| return; |
| |
| // ------------------------------------------------------------------------ |
| |
| #ifdef KMP_GOMP_COMPAT |
| #define GOMP_CRITICAL_SWP_WRK(FLAG) \ |
| if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ |
| KMP_CHECK_GTID; \ |
| CRITICAL_SWP_WRK( 0 ); \ |
| } |
| #else |
| #define GOMP_CRITICAL_SWP_WRK(FLAG) |
| #endif /* KMP_GOMP_COMPAT */ |
| // ------------------------------------------------------------------------ |
| |
| #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \ |
| ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \ |
| TYPE tmp; \ |
| GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ |
| CRITICAL_SWP_WRK(LCK_ID) \ |
| } |
| // The end of workaround for cmplx4 |
| |
| |
| ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp |
| #if KMP_HAVE_QUAD |
| ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp |
| #endif |
| // cmplx4 routine to return void |
| ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp |
| |
| //ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp |
| |
| |
| ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp |
| ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp |
| #if KMP_HAVE_QUAD |
| ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp |
| #if ( KMP_ARCH_X86 ) |
| ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp |
| ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp |
| #endif |
| #endif |
| |
| |
| // End of OpenMP 4.0 Capture |
| |
| #endif //OMP_40_ENABLED |
| |
| #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| |
| |
| #undef OP_CRITICAL |
| |
| /* ------------------------------------------------------------------------ */ |
| /* Generic atomic routines */ |
| /* ------------------------------------------------------------------------ */ |
| |
| void |
| __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) |
| { |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| if ( |
| #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) |
| FALSE /* must use lock */ |
| #else |
| TRUE |
| #endif |
| ) |
| { |
| kmp_int8 old_value, new_value; |
| |
| old_value = *(kmp_int8 *) lhs; |
| (*f)( &new_value, &old_value, rhs ); |
| |
| /* TODO: Should this be acquire or release? */ |
| while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs, |
| *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) ) |
| { |
| KMP_CPU_PAUSE(); |
| |
| old_value = *(kmp_int8 *) lhs; |
| (*f)( &new_value, &old_value, rhs ); |
| } |
| |
| return; |
| } |
| else { |
| // |
| // All 1-byte data is of integer data type. |
| // |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid ); |
| |
| (*f)( lhs, lhs, rhs ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid ); |
| } |
| } |
| |
| void |
| __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) |
| { |
| if ( |
| #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) |
| FALSE /* must use lock */ |
| #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| TRUE /* no alignment problems */ |
| #else |
| ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */ |
| #endif |
| ) |
| { |
| kmp_int16 old_value, new_value; |
| |
| old_value = *(kmp_int16 *) lhs; |
| (*f)( &new_value, &old_value, rhs ); |
| |
| /* TODO: Should this be acquire or release? */ |
| while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs, |
| *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) ) |
| { |
| KMP_CPU_PAUSE(); |
| |
| old_value = *(kmp_int16 *) lhs; |
| (*f)( &new_value, &old_value, rhs ); |
| } |
| |
| return; |
| } |
| else { |
| // |
| // All 2-byte data is of integer data type. |
| // |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid ); |
| |
| (*f)( lhs, lhs, rhs ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid ); |
| } |
| } |
| |
| void |
| __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) |
| { |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| if ( |
| // |
| // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. |
| // Gomp compatibility is broken if this routine is called for floats. |
| // |
| #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| TRUE /* no alignment problems */ |
| #else |
| ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */ |
| #endif |
| ) |
| { |
| kmp_int32 old_value, new_value; |
| |
| old_value = *(kmp_int32 *) lhs; |
| (*f)( &new_value, &old_value, rhs ); |
| |
| /* TODO: Should this be acquire or release? */ |
| while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs, |
| *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) ) |
| { |
| KMP_CPU_PAUSE(); |
| |
| old_value = *(kmp_int32 *) lhs; |
| (*f)( &new_value, &old_value, rhs ); |
| } |
| |
| return; |
| } |
| else { |
| // |
| // Use __kmp_atomic_lock_4i for all 4-byte data, |
| // even if it isn't of integer data type. |
| // |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid ); |
| |
| (*f)( lhs, lhs, rhs ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid ); |
| } |
| } |
| |
| void |
| __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) |
| { |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| if ( |
| |
| #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) |
| FALSE /* must use lock */ |
| #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 |
| TRUE /* no alignment problems */ |
| #else |
| ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */ |
| #endif |
| ) |
| { |
| kmp_int64 old_value, new_value; |
| |
| old_value = *(kmp_int64 *) lhs; |
| (*f)( &new_value, &old_value, rhs ); |
| /* TODO: Should this be acquire or release? */ |
| while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs, |
| *(kmp_int64 *) &old_value, |
| *(kmp_int64 *) &new_value ) ) |
| { |
| KMP_CPU_PAUSE(); |
| |
| old_value = *(kmp_int64 *) lhs; |
| (*f)( &new_value, &old_value, rhs ); |
| } |
| |
| return; |
| } else { |
| // |
| // Use __kmp_atomic_lock_8i for all 8-byte data, |
| // even if it isn't of integer data type. |
| // |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid ); |
| |
| (*f)( lhs, lhs, rhs ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid ); |
| } |
| } |
| |
| void |
| __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) |
| { |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid ); |
| |
| (*f)( lhs, lhs, rhs ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid ); |
| } |
| |
| void |
| __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) |
| { |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid ); |
| |
| (*f)( lhs, lhs, rhs ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid ); |
| } |
| |
| void |
| __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) |
| { |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid ); |
| |
| (*f)( lhs, lhs, rhs ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid ); |
| } |
| |
| void |
| __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) |
| { |
| KMP_DEBUG_ASSERT( __kmp_init_serial ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid ); |
| |
| (*f)( lhs, lhs, rhs ); |
| |
| #ifdef KMP_GOMP_COMPAT |
| if ( __kmp_atomic_mode == 2 ) { |
| __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); |
| } |
| else |
| #endif /* KMP_GOMP_COMPAT */ |
| __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid ); |
| } |
| |
| // AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler |
| // duplicated in order to not use 3-party names in pure Intel code |
| // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. |
| void |
| __kmpc_atomic_start(void) |
| { |
| int gtid = __kmp_entry_gtid(); |
| KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); |
| __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); |
| } |
| |
| |
| void |
| __kmpc_atomic_end(void) |
| { |
| int gtid = __kmp_get_gtid(); |
| KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); |
| __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); |
| } |
| |
| /* ------------------------------------------------------------------------ */ |
| /* ------------------------------------------------------------------------ */ |
| /*! |
| @} |
| */ |
| |
| // end of file |