LLVM OpenMP* Runtime Library
kmp_collapse.h
1 /*
2  * kmp_collapse.h -- header for loop collapse feature
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_COLLAPSE_H
14 #define KMP_COLLAPSE_H
15 
16 #include <type_traits>
17 
18 // Type of the index into the loop nest structures
19 // (with values from 0 to less than n from collapse(n))
20 typedef kmp_int32 kmp_index_t;
21 
22 // Type for combined loop nest space IV:
23 typedef kmp_uint64 kmp_loop_nest_iv_t;
24 
25 // Loop has <, <=, etc. as a comparison:
26 enum comparison_t : kmp_int32 {
27  comp_less_or_eq = 0,
28  comp_greater_or_eq = 1,
29  comp_not_eq = 2,
30  comp_less = 3,
31  comp_greater = 4
32 };
33 
34 // Type of loop IV.
35 // Type of bounds and step, after usual promotions
36 // are a subset of these types (32 & 64 only):
37 enum loop_type_t : kmp_int32 {
38  loop_type_uint8 = 0,
39  loop_type_int8 = 1,
40  loop_type_uint16 = 2,
41  loop_type_int16 = 3,
42  loop_type_uint32 = 4,
43  loop_type_int32 = 5,
44  loop_type_uint64 = 6,
45  loop_type_int64 = 7
46 };
47 
52 template <typename T> struct bounds_infoXX_template {
53 
54  // typedef typename traits_t<T>::unsigned_t UT;
55  typedef typename traits_t<T>::signed_t ST;
56 
57  loop_type_t loop_type; // The differentiator
58  loop_type_t loop_iv_type;
59  comparison_t comparison;
60  // outer_iv should be 0 (or any other less then number of dimentions)
61  // if loop doesn't depend on it (lb1 and ub1 will be 0).
62  // This way we can do multiplication without a check.
63  kmp_index_t outer_iv;
64 
65  // unions to keep the size constant:
66  union {
67  T lb0;
68  kmp_uint64 lb0_u64; // real type can be signed
69  };
70 
71  union {
72  T lb1;
73  kmp_uint64 lb1_u64; // real type can be signed
74  };
75 
76  union {
77  T ub0;
78  kmp_uint64 ub0_u64; // real type can be signed
79  };
80 
81  union {
82  T ub1;
83  kmp_uint64 ub1_u64; // real type can be signed
84  };
85 
86  union {
87  ST step; // signed even if bounds type is unsigned
88  kmp_int64 step_64; // signed
89  };
90 
91  kmp_loop_nest_iv_t trip_count;
92 };
93 
99 struct bounds_info_t {
100 
101  loop_type_t loop_type; // The differentiator
102  loop_type_t loop_iv_type;
103  comparison_t comparison;
104  // outer_iv should be 0 (or any other less then number of dimentions)
105  // if loop doesn't depend on it (lb1 and ub1 will be 0).
106  // This way we can do multiplication without a check.
107  kmp_index_t outer_iv;
108 
109  kmp_uint64 lb0_u64; // real type can be signed
110  kmp_uint64 lb1_u64; // real type can be signed
111  kmp_uint64 ub0_u64; // real type can be signed
112  kmp_uint64 ub1_u64; // real type can be signed
113  kmp_int64 step_64; // signed
114 
115  // This is internal, but it's the only internal thing we need
116  // in rectangular case, so let's expose it here:
117  kmp_loop_nest_iv_t trip_count;
118 };
119 
120 //-------------------------------------------------------------------------
121 // Additional types for internal representation:
122 
123 // Array for a point in the loop space, in the original space.
124 // It's represented in kmp_uint64, but each dimention is calculated in
125 // that loop IV type. Also dimentions have to be converted to those types
126 // when used in generated code.
127 typedef kmp_uint64* kmp_point_t;
128 
129 // Array: Number of loop iterations on each nesting level to achieve some point,
130 // in expanded space or in original space.
131 // OMPTODO: move from using iterations to using offsets (iterations multiplied
132 // by steps). For those we need to be careful with the types, as step can be
133 // negative, but it'll remove multiplications and divisions in several places.
134 typedef kmp_loop_nest_iv_t* kmp_iterations_t;
135 
136 // Internal struct with additional info:
137 template <typename T> struct bounds_info_internalXX_template {
138 
139  // OMPTODO: should span have type T or should it better be
140  // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than
141  // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to
142  // use big_span_t, if it can be resolved at compile time.
143  typedef
144  typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64>
145  big_span_t;
146 
147  // typedef typename big_span_t span_t;
148  typedef T span_t;
149 
150  bounds_infoXX_template<T> b; // possibly adjusted bounds
151 
152  // Leaving this as a union in case we'll switch to span_t with different sizes
153  // (depending on T)
154  union {
155  // Smallest possible value of iv (may be smaller than actually possible)
156  span_t span_smallest;
157  kmp_uint64 span_smallest_u64;
158  };
159 
160  // Leaving this as a union in case we'll switch to span_t with different sizes
161  // (depending on T)
162  union {
163  // Biggest possible value of iv (may be bigger than actually possible)
164  span_t span_biggest;
165  kmp_uint64 span_biggest_u64;
166  };
167 
168  // Did we adjust loop bounds (not counting canonicalization)?
169  bool loop_bounds_adjusted;
170 };
171 
172 // Internal struct with additional info:
173 struct bounds_info_internal_t {
174 
175  bounds_info_t b; // possibly adjusted bounds
176 
177  // Smallest possible value of iv (may be smaller than actually possible)
178  kmp_uint64 span_smallest_u64;
179 
180  // Biggest possible value of iv (may be bigger than actually possible)
181  kmp_uint64 span_biggest_u64;
182 
183  // Did we adjust loop bounds (not counting canonicalization)?
184  bool loop_bounds_adjusted;
185 };
186 
187 //----------APIs for rectangular loop nests--------------------------------
188 
189 // Canonicalize loop nest and calculate overall trip count.
190 // "bounds_nest" has to be allocated per thread.
191 // API will modify original bounds_nest array to bring it to a canonical form
192 // (only <= and >=, no !=, <, >). If the original loop nest was already in a
193 // canonical form there will be no changes to bounds in bounds_nest array
194 // (only trip counts will be calculated).
195 // Returns trip count of overall space.
196 extern "C" kmp_loop_nest_iv_t
197 __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid,
198  /*in/out*/ bounds_info_t *original_bounds_nest,
199  kmp_index_t n);
200 
201 // Calculate old induction variables corresponding to overall new_iv.
202 // Note: original IV will be returned as if it had kmp_uint64 type,
203 // will have to be converted to original type in user code.
204 // Note: trip counts should be already calculated by
205 // __kmpc_process_loop_nest_rectang.
206 // OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline
207 // that into user code.
208 extern "C" void
209 __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv,
210  const bounds_info_t *original_bounds_nest,
211  /*out*/ kmp_uint64 *original_ivs,
212  kmp_index_t n);
213 
214 //----------Init API for non-rectangular loops--------------------------------
215 
216 // Init API for collapsed loops (static, no chunks defined).
217 // "bounds_nest" has to be allocated per thread.
218 // API will modify original bounds_nest array to bring it to a canonical form
219 // (only <= and >=, no !=, <, >). If the original loop nest was already in a
220 // canonical form there will be no changes to bounds in bounds_nest array
221 // (only trip counts will be calculated). Internally API will expand the space
222 // to parallelogram/parallelepiped, calculate total, calculate bounds for the
223 // chunks in terms of the new IV, re-calc them in terms of old IVs (especially
224 // important on the left side, to hit the lower bounds and not step over), and
225 // pick the correct chunk for this thread (so it will calculate chunks up to the
226 // needed one). It could be optimized to calculate just this chunk, potentially
227 // a bit less well distributed among threads. It is designed to make sure that
228 // threads will receive predictable chunks, deterministically (so that next nest
229 // of loops with similar characteristics will get exactly same chunks on same
230 // threads).
231 // Current contract: chunk_bounds_nest has only lb0 and ub0,
232 // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).
233 extern "C" kmp_int32
234 __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid,
235  /*in/out*/ bounds_info_t *original_bounds_nest,
236  /*out*/ bounds_info_t *chunk_bounds_nest,
237  kmp_index_t n,
238  /*out*/ kmp_int32 *plastiter);
239 
240 #endif // KMP_COLLAPSE_H
Definition: kmp.h:235