| /* |
| * kmp_collapse.h -- header for loop collapse feature |
| */ |
| |
| //===----------------------------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef KMP_COLLAPSE_H |
| #define KMP_COLLAPSE_H |
| |
| #include <type_traits> |
| |
| // Type of the index into the loop nest structures |
| // (with values from 0 to less than n from collapse(n)) |
| typedef kmp_int32 kmp_index_t; |
| |
| // Type for combined loop nest space IV: |
| typedef kmp_uint64 kmp_loop_nest_iv_t; |
| |
| // Loop has <, <=, etc. as a comparison: |
| enum comparison_t : kmp_int32 { |
| comp_less_or_eq = 0, |
| comp_greater_or_eq = 1, |
| comp_not_eq = 2, |
| comp_less = 3, |
| comp_greater = 4 |
| }; |
| |
| // Type of loop IV. |
| // Type of bounds and step, after usual promotions |
| // are a subset of these types (32 & 64 only): |
| enum loop_type_t : kmp_int32 { |
| loop_type_uint8 = 0, |
| loop_type_int8 = 1, |
| loop_type_uint16 = 2, |
| loop_type_int16 = 3, |
| loop_type_uint32 = 4, |
| loop_type_int32 = 5, |
| loop_type_uint64 = 6, |
| loop_type_int64 = 7 |
| }; |
| |
| // Defining loop types to handle special cases |
| enum nested_loop_type_t : kmp_int32 { |
| nested_loop_type_unkown = 0, |
| nested_loop_type_lower_triangular_matrix = 1, |
| nested_loop_type_upper_triangular_matrix = 2 |
| }; |
| |
| /*! |
| @ingroup WORK_SHARING |
| * Describes the structure for rectangular nested loops. |
| */ |
| template <typename T> struct bounds_infoXX_template { |
| |
| // typedef typename traits_t<T>::unsigned_t UT; |
| typedef typename traits_t<T>::signed_t ST; |
| |
| loop_type_t loop_type; // The differentiator |
| loop_type_t loop_iv_type; |
| comparison_t comparison; |
| // outer_iv should be 0 (or any other less then number of dimentions) |
| // if loop doesn't depend on it (lb1 and ub1 will be 0). |
| // This way we can do multiplication without a check. |
| kmp_index_t outer_iv; |
| |
| // unions to keep the size constant: |
| union { |
| T lb0; |
| kmp_uint64 lb0_u64; // real type can be signed |
| }; |
| |
| union { |
| T lb1; |
| kmp_uint64 lb1_u64; // real type can be signed |
| }; |
| |
| union { |
| T ub0; |
| kmp_uint64 ub0_u64; // real type can be signed |
| }; |
| |
| union { |
| T ub1; |
| kmp_uint64 ub1_u64; // real type can be signed |
| }; |
| |
| union { |
| ST step; // signed even if bounds type is unsigned |
| kmp_int64 step_64; // signed |
| }; |
| |
| kmp_loop_nest_iv_t trip_count; |
| }; |
| |
| /*! |
| @ingroup WORK_SHARING |
| * Interface struct for rectangular nested loops. |
| * Same size as bounds_infoXX_template. |
| */ |
| struct bounds_info_t { |
| |
| loop_type_t loop_type; // The differentiator |
| loop_type_t loop_iv_type; |
| comparison_t comparison; |
| // outer_iv should be 0 (or any other less then number of dimentions) |
| // if loop doesn't depend on it (lb1 and ub1 will be 0). |
| // This way we can do multiplication without a check. |
| kmp_index_t outer_iv; |
| |
| kmp_uint64 lb0_u64; // real type can be signed |
| kmp_uint64 lb1_u64; // real type can be signed |
| kmp_uint64 ub0_u64; // real type can be signed |
| kmp_uint64 ub1_u64; // real type can be signed |
| kmp_int64 step_64; // signed |
| |
| // This is internal, but it's the only internal thing we need |
| // in rectangular case, so let's expose it here: |
| kmp_loop_nest_iv_t trip_count; |
| }; |
| |
| //------------------------------------------------------------------------- |
| // Additional types for internal representation: |
| |
| // Array for a point in the loop space, in the original space. |
| // It's represented in kmp_uint64, but each dimention is calculated in |
| // that loop IV type. Also dimentions have to be converted to those types |
| // when used in generated code. |
| typedef kmp_uint64 *kmp_point_t; |
| |
| // Array: Number of loop iterations on each nesting level to achieve some point, |
| // in expanded space or in original space. |
| // OMPTODO: move from using iterations to using offsets (iterations multiplied |
| // by steps). For those we need to be careful with the types, as step can be |
| // negative, but it'll remove multiplications and divisions in several places. |
| typedef kmp_loop_nest_iv_t *kmp_iterations_t; |
| |
| // Internal struct with additional info: |
| template <typename T> struct bounds_info_internalXX_template { |
| |
| // OMPTODO: should span have type T or should it better be |
| // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than |
| // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to |
| // use big_span_t, if it can be resolved at compile time. |
| typedef |
| typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64> |
| big_span_t; |
| |
| // typedef typename big_span_t span_t; |
| typedef T span_t; |
| |
| bounds_infoXX_template<T> b; // possibly adjusted bounds |
| |
| // Leaving this as a union in case we'll switch to span_t with different sizes |
| // (depending on T) |
| union { |
| // Smallest possible value of iv (may be smaller than actually possible) |
| span_t span_smallest; |
| kmp_uint64 span_smallest_u64; |
| }; |
| |
| // Leaving this as a union in case we'll switch to span_t with different sizes |
| // (depending on T) |
| union { |
| // Biggest possible value of iv (may be bigger than actually possible) |
| span_t span_biggest; |
| kmp_uint64 span_biggest_u64; |
| }; |
| |
| // Did we adjust loop bounds (not counting canonicalization)? |
| bool loop_bounds_adjusted; |
| }; |
| |
| // Internal struct with additional info: |
| struct bounds_info_internal_t { |
| |
| bounds_info_t b; // possibly adjusted bounds |
| |
| // Smallest possible value of iv (may be smaller than actually possible) |
| kmp_uint64 span_smallest_u64; |
| |
| // Biggest possible value of iv (may be bigger than actually possible) |
| kmp_uint64 span_biggest_u64; |
| |
| // Did we adjust loop bounds (not counting canonicalization)? |
| bool loop_bounds_adjusted; |
| }; |
| |
| //----------APIs for rectangular loop nests-------------------------------- |
| |
| // Canonicalize loop nest and calculate overall trip count. |
| // "bounds_nest" has to be allocated per thread. |
| // API will modify original bounds_nest array to bring it to a canonical form |
| // (only <= and >=, no !=, <, >). If the original loop nest was already in a |
| // canonical form there will be no changes to bounds in bounds_nest array |
| // (only trip counts will be calculated). |
| // Returns trip count of overall space. |
| extern "C" kmp_loop_nest_iv_t |
| __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid, |
| /*in/out*/ bounds_info_t *original_bounds_nest, |
| kmp_index_t n); |
| |
| // Calculate old induction variables corresponding to overall new_iv. |
| // Note: original IV will be returned as if it had kmp_uint64 type, |
| // will have to be converted to original type in user code. |
| // Note: trip counts should be already calculated by |
| // __kmpc_process_loop_nest_rectang. |
| // OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline |
| // that into user code. |
| extern "C" void |
| __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv, |
| const bounds_info_t *original_bounds_nest, |
| /*out*/ kmp_uint64 *original_ivs, |
| kmp_index_t n); |
| |
| //----------Init API for non-rectangular loops-------------------------------- |
| |
| // Init API for collapsed loops (static, no chunks defined). |
| // "bounds_nest" has to be allocated per thread. |
| // API will modify original bounds_nest array to bring it to a canonical form |
| // (only <= and >=, no !=, <, >). If the original loop nest was already in a |
| // canonical form there will be no changes to bounds in bounds_nest array |
| // (only trip counts will be calculated). Internally API will expand the space |
| // to parallelogram/parallelepiped, calculate total, calculate bounds for the |
| // chunks in terms of the new IV, re-calc them in terms of old IVs (especially |
| // important on the left side, to hit the lower bounds and not step over), and |
| // pick the correct chunk for this thread (so it will calculate chunks up to the |
| // needed one). It could be optimized to calculate just this chunk, potentially |
| // a bit less well distributed among threads. It is designed to make sure that |
| // threads will receive predictable chunks, deterministically (so that next nest |
| // of loops with similar characteristics will get exactly same chunks on same |
| // threads). |
| // Current contract: chunk_bounds_nest has only lb0 and ub0, |
| // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future). |
| extern "C" kmp_int32 |
| __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid, |
| /*in/out*/ bounds_info_t *original_bounds_nest, |
| /*out*/ bounds_info_t *chunk_bounds_nest, |
| kmp_index_t n, |
| /*out*/ kmp_int32 *plastiter); |
| |
| #endif // KMP_COLLAPSE_H |