LLVM OpenMP* Runtime Library
kmp_sched.cpp
1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15  NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16  it may change values between parallel regions. __kmp_max_nth
17  is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat) \
43  { \
44  kmp_int64 t; \
45  kmp_int64 u = (kmp_int64)(*pupper); \
46  kmp_int64 l = (kmp_int64)(*plower); \
47  kmp_int64 i = (kmp_int64)incr; \
48  if (i == 1) { \
49  t = u - l + 1; \
50  } else if (i == -1) { \
51  t = l - u + 1; \
52  } else if (i > 0) { \
53  t = (u - l) / i + 1; \
54  } else { \
55  t = (l - u) / (-i) + 1; \
56  } \
57  KMP_COUNT_VALUE(stat, t); \
58  KMP_POP_PARTITIONED_TIMER(); \
59  }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
65 static inline void check_loc(ident_t *&loc) {
66  if (loc == NULL)
67  loc = &loc_stub; // may need to report location info to ittnotify
68 }
69 
70 template <typename T>
71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72  kmp_int32 schedtype, kmp_int32 *plastiter,
73  T *plower, T *pupper,
74  typename traits_t<T>::signed_t *pstride,
75  typename traits_t<T>::signed_t incr,
76  typename traits_t<T>::signed_t chunk
77 #if OMPT_SUPPORT && OMPT_OPTIONAL
78  ,
79  void *codeptr
80 #endif
81 ) {
82  KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85 
86  // Clear monotonic/nonmonotonic bits (ignore it)
87  schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype);
88 
89  typedef typename traits_t<T>::unsigned_t UT;
90  typedef typename traits_t<T>::signed_t ST;
91  /* this all has to be changed back to TID and such.. */
92  kmp_int32 gtid = global_tid;
93  kmp_uint32 tid;
94  kmp_uint32 nth;
95  UT trip_count;
96  kmp_team_t *team;
97  __kmp_assert_valid_gtid(gtid);
98  kmp_info_t *th = __kmp_threads[gtid];
99 
100 #if OMPT_SUPPORT && OMPT_OPTIONAL
101  ompt_team_info_t *team_info = NULL;
102  ompt_task_info_t *task_info = NULL;
103  ompt_work_t ompt_work_type = ompt_work_loop;
104 
105  static kmp_int8 warn = 0;
106 
107  if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
108  // Only fully initialize variables needed by OMPT if OMPT is enabled.
109  team_info = __ompt_get_teaminfo(0, NULL);
110  task_info = __ompt_get_task_info_object(0);
111  // Determine workshare type
112  if (loc != NULL) {
113  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
114  ompt_work_type = ompt_work_loop;
115  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
116  ompt_work_type = ompt_work_sections;
117  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
118  ompt_work_type = ompt_work_distribute;
119  } else {
120  kmp_int8 bool_res =
121  KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
122  if (bool_res)
123  KMP_WARNING(OmptOutdatedWorkshare);
124  }
125  KMP_DEBUG_ASSERT(ompt_work_type);
126  }
127  }
128 #endif
129 
130  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
131  KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
132 #ifdef KMP_DEBUG
133  {
134  char *buff;
135  // create format specifiers before the debug output
136  buff = __kmp_str_format(
137  "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
138  " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
139  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
140  traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
141  KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
142  *pstride, incr, chunk));
143  __kmp_str_free(&buff);
144  }
145 #endif
146 
147  if (__kmp_env_consistency_check) {
148  __kmp_push_workshare(global_tid, ct_pdo, loc);
149  if (incr == 0) {
150  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
151  loc);
152  }
153  }
154  /* special handling for zero-trip loops */
155  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
156  if (plastiter != NULL)
157  *plastiter = FALSE;
158  /* leave pupper and plower set to entire iteration space */
159  *pstride = incr; /* value should never be used */
160 // *plower = *pupper - incr;
161 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
162 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
163 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
164 #ifdef KMP_DEBUG
165  {
166  char *buff;
167  // create format specifiers before the debug output
168  buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
169  "lower=%%%s upper=%%%s stride = %%%s "
170  "signed?<%s>, loc = %%s\n",
171  traits_t<T>::spec, traits_t<T>::spec,
172  traits_t<ST>::spec, traits_t<T>::spec);
173  check_loc(loc);
174  KD_TRACE(100,
175  (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
176  __kmp_str_free(&buff);
177  }
178 #endif
179  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
180 
181 #if OMPT_SUPPORT && OMPT_OPTIONAL
182  if (ompt_enabled.ompt_callback_work) {
183  ompt_callbacks.ompt_callback(ompt_callback_work)(
184  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
185  &(task_info->task_data), 0, codeptr);
186  }
187 #endif
188  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
189  return;
190  }
191 
192  // Although there are schedule enumerations above kmp_ord_upper which are not
193  // schedules for "distribute", the only ones which are useful are dynamic, so
194  // cannot be seen here, since this codepath is only executed for static
195  // schedules.
196  if (schedtype > kmp_ord_upper) {
197  // we are in DISTRIBUTE construct
198  schedtype += kmp_sch_static -
199  kmp_distribute_static; // AC: convert to usual schedule type
200  if (th->th.th_team->t.t_serialized > 1) {
201  tid = 0;
202  team = th->th.th_team;
203  } else {
204  tid = th->th.th_team->t.t_master_tid;
205  team = th->th.th_team->t.t_parent;
206  }
207  } else {
208  tid = __kmp_tid_from_gtid(global_tid);
209  team = th->th.th_team;
210  }
211 
212  /* determine if "for" loop is an active worksharing construct */
213  if (team->t.t_serialized) {
214  /* serialized parallel, each thread executes whole iteration space */
215  if (plastiter != NULL)
216  *plastiter = TRUE;
217  /* leave pupper and plower set to entire iteration space */
218  *pstride =
219  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
220 
221 #ifdef KMP_DEBUG
222  {
223  char *buff;
224  // create format specifiers before the debug output
225  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
226  "lower=%%%s upper=%%%s stride = %%%s\n",
227  traits_t<T>::spec, traits_t<T>::spec,
228  traits_t<ST>::spec);
229  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
230  __kmp_str_free(&buff);
231  }
232 #endif
233  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
234 
235 #if OMPT_SUPPORT && OMPT_OPTIONAL
236  if (ompt_enabled.ompt_callback_work) {
237  ompt_callbacks.ompt_callback(ompt_callback_work)(
238  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
239  &(task_info->task_data), *pstride, codeptr);
240  }
241 #endif
242  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
243  return;
244  }
245  nth = team->t.t_nproc;
246  if (nth == 1) {
247  if (plastiter != NULL)
248  *plastiter = TRUE;
249  *pstride =
250  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
251 #ifdef KMP_DEBUG
252  {
253  char *buff;
254  // create format specifiers before the debug output
255  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
256  "lower=%%%s upper=%%%s stride = %%%s\n",
257  traits_t<T>::spec, traits_t<T>::spec,
258  traits_t<ST>::spec);
259  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
260  __kmp_str_free(&buff);
261  }
262 #endif
263  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
264 
265 #if OMPT_SUPPORT && OMPT_OPTIONAL
266  if (ompt_enabled.ompt_callback_work) {
267  ompt_callbacks.ompt_callback(ompt_callback_work)(
268  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
269  &(task_info->task_data), *pstride, codeptr);
270  }
271 #endif
272  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
273  return;
274  }
275 
276  /* compute trip count */
277  if (incr == 1) {
278  trip_count = *pupper - *plower + 1;
279  } else if (incr == -1) {
280  trip_count = *plower - *pupper + 1;
281  } else if (incr > 0) {
282  // upper-lower can exceed the limit of signed type
283  trip_count = (UT)(*pupper - *plower) / incr + 1;
284  } else {
285  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
286  }
287 
288 #if KMP_STATS_ENABLED
289  if (KMP_MASTER_GTID(gtid)) {
290  KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
291  }
292 #endif
293 
294  if (__kmp_env_consistency_check) {
295  /* tripcount overflow? */
296  if (trip_count == 0 && *pupper != *plower) {
297  __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
298  loc);
299  }
300  }
301 
302  /* compute remaining parameters */
303  switch (schedtype) {
304  case kmp_sch_static: {
305  if (trip_count < nth) {
306  KMP_DEBUG_ASSERT(
307  __kmp_static == kmp_sch_static_greedy ||
308  __kmp_static ==
309  kmp_sch_static_balanced); // Unknown static scheduling type.
310  if (tid < trip_count) {
311  *pupper = *plower = *plower + tid * incr;
312  } else {
313  // set bounds so non-active threads execute no iterations
314  *plower = *pupper + (incr > 0 ? 1 : -1);
315  }
316  if (plastiter != NULL)
317  *plastiter = (tid == trip_count - 1);
318  } else {
319  if (__kmp_static == kmp_sch_static_balanced) {
320  UT small_chunk = trip_count / nth;
321  UT extras = trip_count % nth;
322  *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
323  *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
324  if (plastiter != NULL)
325  *plastiter = (tid == nth - 1);
326  } else {
327  T big_chunk_inc_count =
328  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
329  T old_upper = *pupper;
330 
331  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
332  // Unknown static scheduling type.
333 
334  *plower += tid * big_chunk_inc_count;
335  *pupper = *plower + big_chunk_inc_count - incr;
336  if (incr > 0) {
337  if (*pupper < *plower)
338  *pupper = traits_t<T>::max_value;
339  if (plastiter != NULL)
340  *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
341  if (*pupper > old_upper)
342  *pupper = old_upper; // tracker C73258
343  } else {
344  if (*pupper > *plower)
345  *pupper = traits_t<T>::min_value;
346  if (plastiter != NULL)
347  *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
348  if (*pupper < old_upper)
349  *pupper = old_upper; // tracker C73258
350  }
351  }
352  }
353  *pstride = trip_count;
354  break;
355  }
356  case kmp_sch_static_chunked: {
357  ST span;
358  UT nchunks;
359  if (chunk < 1)
360  chunk = 1;
361  else if ((UT)chunk > trip_count)
362  chunk = trip_count;
363  nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
364  span = chunk * incr;
365  if (nchunks < nth) {
366  *pstride = span * nchunks;
367  if (tid < nchunks) {
368  *plower = *plower + (span * tid);
369  *pupper = *plower + span - incr;
370  } else {
371  *plower = *pupper + (incr > 0 ? 1 : -1);
372  }
373  } else {
374  *pstride = span * nth;
375  *plower = *plower + (span * tid);
376  *pupper = *plower + span - incr;
377  }
378  if (plastiter != NULL)
379  *plastiter = (tid == (nchunks - 1) % nth);
380  break;
381  }
382  case kmp_sch_static_balanced_chunked: {
383  T old_upper = *pupper;
384  // round up to make sure the chunk is enough to cover all iterations
385  UT span = (trip_count + nth - 1) / nth;
386 
387  // perform chunk adjustment
388  chunk = (span + chunk - 1) & ~(chunk - 1);
389 
390  span = chunk * incr;
391  *plower = *plower + (span * tid);
392  *pupper = *plower + span - incr;
393  if (incr > 0) {
394  if (*pupper > old_upper)
395  *pupper = old_upper;
396  } else if (*pupper < old_upper)
397  *pupper = old_upper;
398 
399  if (plastiter != NULL)
400  *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
401  break;
402  }
403  default:
404  KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
405  break;
406  }
407 
408 #if USE_ITT_BUILD
409  // Report loop metadata
410  if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
411  __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
412  team->t.t_active_level == 1) {
413  kmp_uint64 cur_chunk = chunk;
414  check_loc(loc);
415  // Calculate chunk in case it was not specified; it is specified for
416  // kmp_sch_static_chunked
417  if (schedtype == kmp_sch_static) {
418  cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
419  }
420  // 0 - "static" schedule
421  __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
422  }
423 #endif
424 #ifdef KMP_DEBUG
425  {
426  char *buff;
427  // create format specifiers before the debug output
428  buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
429  "upper=%%%s stride = %%%s signed?<%s>\n",
430  traits_t<T>::spec, traits_t<T>::spec,
431  traits_t<ST>::spec, traits_t<T>::spec);
432  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
433  __kmp_str_free(&buff);
434  }
435 #endif
436  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
437 
438 #if OMPT_SUPPORT && OMPT_OPTIONAL
439  if (ompt_enabled.ompt_callback_work) {
440  ompt_callbacks.ompt_callback(ompt_callback_work)(
441  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
442  &(task_info->task_data), trip_count, codeptr);
443  }
444  if (ompt_enabled.ompt_callback_dispatch) {
445  ompt_dispatch_t dispatch_type;
446  ompt_data_t instance = ompt_data_none;
447  ompt_dispatch_chunk_t dispatch_chunk;
448  if (ompt_work_type == ompt_work_sections) {
449  dispatch_type = ompt_dispatch_section;
450  instance.ptr = codeptr;
451  } else {
452  OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
453  dispatch_type = (ompt_work_type == ompt_work_distribute)
454  ? ompt_dispatch_distribute_chunk
455  : ompt_dispatch_ws_loop_chunk;
456  instance.ptr = &dispatch_chunk;
457  }
458  ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
459  &(team_info->parallel_data), &(task_info->task_data), dispatch_type,
460  instance);
461  }
462 #endif
463 
464  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
465  return;
466 }
467 
468 template <typename T>
469 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
470  kmp_int32 schedule, kmp_int32 *plastiter,
471  T *plower, T *pupper, T *pupperDist,
472  typename traits_t<T>::signed_t *pstride,
473  typename traits_t<T>::signed_t incr,
474  typename traits_t<T>::signed_t chunk
475 #if OMPT_SUPPORT && OMPT_OPTIONAL
476  ,
477  void *codeptr
478 #endif
479 ) {
480  KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
481  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
482  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
483  typedef typename traits_t<T>::unsigned_t UT;
484  typedef typename traits_t<T>::signed_t ST;
485  kmp_uint32 tid;
486  kmp_uint32 nth;
487  kmp_uint32 team_id;
488  kmp_uint32 nteams;
489  UT trip_count;
490  kmp_team_t *team;
491  kmp_info_t *th;
492 
493  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
494  KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
495  __kmp_assert_valid_gtid(gtid);
496 #ifdef KMP_DEBUG
497  {
498  char *buff;
499  // create format specifiers before the debug output
500  buff = __kmp_str_format(
501  "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
502  "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
503  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
504  traits_t<ST>::spec, traits_t<T>::spec);
505  KD_TRACE(100,
506  (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
507  __kmp_str_free(&buff);
508  }
509 #endif
510 
511  if (__kmp_env_consistency_check) {
512  __kmp_push_workshare(gtid, ct_pdo, loc);
513  if (incr == 0) {
514  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
515  loc);
516  }
517  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
518  // The loop is illegal.
519  // Some zero-trip loops maintained by compiler, e.g.:
520  // for(i=10;i<0;++i) // lower >= upper - run-time check
521  // for(i=0;i>10;--i) // lower <= upper - run-time check
522  // for(i=0;i>10;++i) // incr > 0 - compile-time check
523  // for(i=10;i<0;--i) // incr < 0 - compile-time check
524  // Compiler does not check the following illegal loops:
525  // for(i=0;i<10;i+=incr) // where incr<0
526  // for(i=10;i>0;i-=incr) // where incr<0
527  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
528  }
529  }
530  tid = __kmp_tid_from_gtid(gtid);
531  th = __kmp_threads[gtid];
532  nth = th->th.th_team_nproc;
533  team = th->th.th_team;
534  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
535  nteams = th->th.th_teams_size.nteams;
536  team_id = team->t.t_master_tid;
537  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
538 
539  // compute global trip count
540  if (incr == 1) {
541  trip_count = *pupper - *plower + 1;
542  } else if (incr == -1) {
543  trip_count = *plower - *pupper + 1;
544  } else if (incr > 0) {
545  // upper-lower can exceed the limit of signed type
546  trip_count = (UT)(*pupper - *plower) / incr + 1;
547  } else {
548  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
549  }
550 
551  *pstride = *pupper - *plower; // just in case (can be unused)
552  if (trip_count <= nteams) {
553  KMP_DEBUG_ASSERT(
554  __kmp_static == kmp_sch_static_greedy ||
555  __kmp_static ==
556  kmp_sch_static_balanced); // Unknown static scheduling type.
557  // only primary threads of some teams get single iteration, other threads
558  // get nothing
559  if (team_id < trip_count && tid == 0) {
560  *pupper = *pupperDist = *plower = *plower + team_id * incr;
561  } else {
562  *pupperDist = *pupper;
563  *plower = *pupper + incr; // compiler should skip loop body
564  }
565  if (plastiter != NULL)
566  *plastiter = (tid == 0 && team_id == trip_count - 1);
567  } else {
568  // Get the team's chunk first (each team gets at most one chunk)
569  if (__kmp_static == kmp_sch_static_balanced) {
570  UT chunkD = trip_count / nteams;
571  UT extras = trip_count % nteams;
572  *plower +=
573  incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
574  *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
575  if (plastiter != NULL)
576  *plastiter = (team_id == nteams - 1);
577  } else {
578  T chunk_inc_count =
579  (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
580  T upper = *pupper;
581  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
582  // Unknown static scheduling type.
583  *plower += team_id * chunk_inc_count;
584  *pupperDist = *plower + chunk_inc_count - incr;
585  // Check/correct bounds if needed
586  if (incr > 0) {
587  if (*pupperDist < *plower)
588  *pupperDist = traits_t<T>::max_value;
589  if (plastiter != NULL)
590  *plastiter = *plower <= upper && *pupperDist > upper - incr;
591  if (*pupperDist > upper)
592  *pupperDist = upper; // tracker C73258
593  if (*plower > *pupperDist) {
594  *pupper = *pupperDist; // no iterations available for the team
595  goto end;
596  }
597  } else {
598  if (*pupperDist > *plower)
599  *pupperDist = traits_t<T>::min_value;
600  if (plastiter != NULL)
601  *plastiter = *plower >= upper && *pupperDist < upper - incr;
602  if (*pupperDist < upper)
603  *pupperDist = upper; // tracker C73258
604  if (*plower < *pupperDist) {
605  *pupper = *pupperDist; // no iterations available for the team
606  goto end;
607  }
608  }
609  }
610  // Get the parallel loop chunk now (for thread)
611  // compute trip count for team's chunk
612  if (incr == 1) {
613  trip_count = *pupperDist - *plower + 1;
614  } else if (incr == -1) {
615  trip_count = *plower - *pupperDist + 1;
616  } else if (incr > 1) {
617  // upper-lower can exceed the limit of signed type
618  trip_count = (UT)(*pupperDist - *plower) / incr + 1;
619  } else {
620  trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
621  }
622  KMP_DEBUG_ASSERT(trip_count);
623  switch (schedule) {
624  case kmp_sch_static: {
625  if (trip_count <= nth) {
626  KMP_DEBUG_ASSERT(
627  __kmp_static == kmp_sch_static_greedy ||
628  __kmp_static ==
629  kmp_sch_static_balanced); // Unknown static scheduling type.
630  if (tid < trip_count)
631  *pupper = *plower = *plower + tid * incr;
632  else
633  *plower = *pupper + incr; // no iterations available
634  if (plastiter != NULL)
635  if (*plastiter != 0 && !(tid == trip_count - 1))
636  *plastiter = 0;
637  } else {
638  if (__kmp_static == kmp_sch_static_balanced) {
639  UT chunkL = trip_count / nth;
640  UT extras = trip_count % nth;
641  *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
642  *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
643  if (plastiter != NULL)
644  if (*plastiter != 0 && !(tid == nth - 1))
645  *plastiter = 0;
646  } else {
647  T chunk_inc_count =
648  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
649  T upper = *pupperDist;
650  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
651  // Unknown static scheduling type.
652  *plower += tid * chunk_inc_count;
653  *pupper = *plower + chunk_inc_count - incr;
654  if (incr > 0) {
655  if (*pupper < *plower)
656  *pupper = traits_t<T>::max_value;
657  if (plastiter != NULL)
658  if (*plastiter != 0 &&
659  !(*plower <= upper && *pupper > upper - incr))
660  *plastiter = 0;
661  if (*pupper > upper)
662  *pupper = upper; // tracker C73258
663  } else {
664  if (*pupper > *plower)
665  *pupper = traits_t<T>::min_value;
666  if (plastiter != NULL)
667  if (*plastiter != 0 &&
668  !(*plower >= upper && *pupper < upper - incr))
669  *plastiter = 0;
670  if (*pupper < upper)
671  *pupper = upper; // tracker C73258
672  }
673  }
674  }
675  break;
676  }
677  case kmp_sch_static_chunked: {
678  ST span;
679  if (chunk < 1)
680  chunk = 1;
681  span = chunk * incr;
682  *pstride = span * nth;
683  *plower = *plower + (span * tid);
684  *pupper = *plower + span - incr;
685  if (plastiter != NULL)
686  if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
687  *plastiter = 0;
688  break;
689  }
690  default:
691  KMP_ASSERT2(0,
692  "__kmpc_dist_for_static_init: unknown loop scheduling type");
693  break;
694  }
695  }
696 end:;
697 #ifdef KMP_DEBUG
698  {
699  char *buff;
700  // create format specifiers before the debug output
701  buff = __kmp_str_format(
702  "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
703  "stride=%%%s signed?<%s>\n",
704  traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
705  traits_t<ST>::spec, traits_t<T>::spec);
706  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
707  __kmp_str_free(&buff);
708  }
709 #endif
710  KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
711 #if OMPT_SUPPORT && OMPT_OPTIONAL
712  if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
713  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
714  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
715  if (ompt_enabled.ompt_callback_work) {
716  ompt_callbacks.ompt_callback(ompt_callback_work)(
717  ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
718  &(task_info->task_data), 0, codeptr);
719  }
720  if (ompt_enabled.ompt_callback_dispatch) {
721  ompt_data_t instance = ompt_data_none;
722  ompt_dispatch_chunk_t dispatch_chunk;
723  OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
724  instance.ptr = &dispatch_chunk;
725  ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
726  &(team_info->parallel_data), &(task_info->task_data),
727  ompt_dispatch_distribute_chunk, instance);
728  }
729  }
730 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
731  KMP_STATS_LOOP_END(OMP_distribute_iterations);
732  return;
733 }
734 
735 template <typename T>
736 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
737  kmp_int32 *p_last, T *p_lb, T *p_ub,
738  typename traits_t<T>::signed_t *p_st,
739  typename traits_t<T>::signed_t incr,
740  typename traits_t<T>::signed_t chunk) {
741  // The routine returns the first chunk distributed to the team and
742  // stride for next chunks calculation.
743  // Last iteration flag set for the team that will execute
744  // the last iteration of the loop.
745  // The routine is called for dist_schedule(static,chunk) only.
746  typedef typename traits_t<T>::unsigned_t UT;
747  typedef typename traits_t<T>::signed_t ST;
748  kmp_uint32 team_id;
749  kmp_uint32 nteams;
750  UT trip_count;
751  T lower;
752  T upper;
753  ST span;
754  kmp_team_t *team;
755  kmp_info_t *th;
756 
757  KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
758  KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
759  __kmp_assert_valid_gtid(gtid);
760 #ifdef KMP_DEBUG
761  {
762  char *buff;
763  // create format specifiers before the debug output
764  buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
765  "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
766  traits_t<T>::spec, traits_t<T>::spec,
767  traits_t<ST>::spec, traits_t<ST>::spec,
768  traits_t<T>::spec);
769  KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
770  __kmp_str_free(&buff);
771  }
772 #endif
773 
774  lower = *p_lb;
775  upper = *p_ub;
776  if (__kmp_env_consistency_check) {
777  if (incr == 0) {
778  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
779  loc);
780  }
781  if (incr > 0 ? (upper < lower) : (lower < upper)) {
782  // The loop is illegal.
783  // Some zero-trip loops maintained by compiler, e.g.:
784  // for(i=10;i<0;++i) // lower >= upper - run-time check
785  // for(i=0;i>10;--i) // lower <= upper - run-time check
786  // for(i=0;i>10;++i) // incr > 0 - compile-time check
787  // for(i=10;i<0;--i) // incr < 0 - compile-time check
788  // Compiler does not check the following illegal loops:
789  // for(i=0;i<10;i+=incr) // where incr<0
790  // for(i=10;i>0;i-=incr) // where incr<0
791  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
792  }
793  }
794  th = __kmp_threads[gtid];
795  team = th->th.th_team;
796  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
797  nteams = th->th.th_teams_size.nteams;
798  team_id = team->t.t_master_tid;
799  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
800 
801  // compute trip count
802  if (incr == 1) {
803  trip_count = upper - lower + 1;
804  } else if (incr == -1) {
805  trip_count = lower - upper + 1;
806  } else if (incr > 0) {
807  // upper-lower can exceed the limit of signed type
808  trip_count = (UT)(upper - lower) / incr + 1;
809  } else {
810  trip_count = (UT)(lower - upper) / (-incr) + 1;
811  }
812  if (chunk < 1)
813  chunk = 1;
814  span = chunk * incr;
815  *p_st = span * nteams;
816  *p_lb = lower + (span * team_id);
817  *p_ub = *p_lb + span - incr;
818  if (p_last != NULL)
819  *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
820  // Correct upper bound if needed
821  if (incr > 0) {
822  if (*p_ub < *p_lb) // overflow?
823  *p_ub = traits_t<T>::max_value;
824  if (*p_ub > upper)
825  *p_ub = upper; // tracker C73258
826  } else { // incr < 0
827  if (*p_ub > *p_lb)
828  *p_ub = traits_t<T>::min_value;
829  if (*p_ub < upper)
830  *p_ub = upper; // tracker C73258
831  }
832 #ifdef KMP_DEBUG
833  {
834  char *buff;
835  // create format specifiers before the debug output
836  buff =
837  __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
838  "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
839  traits_t<T>::spec, traits_t<T>::spec,
840  traits_t<ST>::spec, traits_t<ST>::spec);
841  KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
842  __kmp_str_free(&buff);
843  }
844 #endif
845 }
846 
847 //------------------------------------------------------------------------------
848 extern "C" {
870 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
871  kmp_int32 *plastiter, kmp_int32 *plower,
872  kmp_int32 *pupper, kmp_int32 *pstride,
873  kmp_int32 incr, kmp_int32 chunk) {
874  __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
875  pupper, pstride, incr, chunk
876 #if OMPT_SUPPORT && OMPT_OPTIONAL
877  ,
878  OMPT_GET_RETURN_ADDRESS(0)
879 #endif
880  );
881 }
882 
886 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
887  kmp_int32 schedtype, kmp_int32 *plastiter,
888  kmp_uint32 *plower, kmp_uint32 *pupper,
889  kmp_int32 *pstride, kmp_int32 incr,
890  kmp_int32 chunk) {
891  __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
892  pupper, pstride, incr, chunk
893 #if OMPT_SUPPORT && OMPT_OPTIONAL
894  ,
895  OMPT_GET_RETURN_ADDRESS(0)
896 #endif
897  );
898 }
899 
903 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
904  kmp_int32 *plastiter, kmp_int64 *plower,
905  kmp_int64 *pupper, kmp_int64 *pstride,
906  kmp_int64 incr, kmp_int64 chunk) {
907  __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
908  pupper, pstride, incr, chunk
909 #if OMPT_SUPPORT && OMPT_OPTIONAL
910  ,
911  OMPT_GET_RETURN_ADDRESS(0)
912 #endif
913  );
914 }
915 
919 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
920  kmp_int32 schedtype, kmp_int32 *plastiter,
921  kmp_uint64 *plower, kmp_uint64 *pupper,
922  kmp_int64 *pstride, kmp_int64 incr,
923  kmp_int64 chunk) {
924  __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
925  pupper, pstride, incr, chunk
926 #if OMPT_SUPPORT && OMPT_OPTIONAL
927  ,
928  OMPT_GET_RETURN_ADDRESS(0)
929 #endif
930  );
931 }
936 #if OMPT_SUPPORT && OMPT_OPTIONAL
937 #define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
938 #else
939 #define OMPT_CODEPTR_ARG
940 #endif
941 
964 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
965  kmp_int32 schedule, kmp_int32 *plastiter,
966  kmp_int32 *plower, kmp_int32 *pupper,
967  kmp_int32 *pupperD, kmp_int32 *pstride,
968  kmp_int32 incr, kmp_int32 chunk) {
969  __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
970  pupper, pupperD, pstride, incr,
971  chunk OMPT_CODEPTR_ARG);
972 }
973 
977 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
978  kmp_int32 schedule, kmp_int32 *plastiter,
979  kmp_uint32 *plower, kmp_uint32 *pupper,
980  kmp_uint32 *pupperD, kmp_int32 *pstride,
981  kmp_int32 incr, kmp_int32 chunk) {
982  __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
983  pupper, pupperD, pstride, incr,
984  chunk OMPT_CODEPTR_ARG);
985 }
986 
990 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
991  kmp_int32 schedule, kmp_int32 *plastiter,
992  kmp_int64 *plower, kmp_int64 *pupper,
993  kmp_int64 *pupperD, kmp_int64 *pstride,
994  kmp_int64 incr, kmp_int64 chunk) {
995  __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
996  pupper, pupperD, pstride, incr,
997  chunk OMPT_CODEPTR_ARG);
998 }
999 
1003 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
1004  kmp_int32 schedule, kmp_int32 *plastiter,
1005  kmp_uint64 *plower, kmp_uint64 *pupper,
1006  kmp_uint64 *pupperD, kmp_int64 *pstride,
1007  kmp_int64 incr, kmp_int64 chunk) {
1008  __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
1009  pupper, pupperD, pstride, incr,
1010  chunk OMPT_CODEPTR_ARG);
1011 }
1016 //------------------------------------------------------------------------------
1017 // Auxiliary routines for Distribute Parallel Loop construct implementation
1018 // Transfer call to template< type T >
1019 // __kmp_team_static_init( ident_t *loc, int gtid,
1020 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
1021 
1042 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1043  kmp_int32 *p_lb, kmp_int32 *p_ub,
1044  kmp_int32 *p_st, kmp_int32 incr,
1045  kmp_int32 chunk) {
1046  KMP_DEBUG_ASSERT(__kmp_init_serial);
1047  __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1048  chunk);
1049 }
1050 
1054 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1055  kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1056  kmp_int32 *p_st, kmp_int32 incr,
1057  kmp_int32 chunk) {
1058  KMP_DEBUG_ASSERT(__kmp_init_serial);
1059  __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1060  chunk);
1061 }
1062 
1066 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1067  kmp_int64 *p_lb, kmp_int64 *p_ub,
1068  kmp_int64 *p_st, kmp_int64 incr,
1069  kmp_int64 chunk) {
1070  KMP_DEBUG_ASSERT(__kmp_init_serial);
1071  __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1072  chunk);
1073 }
1074 
1078 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1079  kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1080  kmp_int64 *p_st, kmp_int64 incr,
1081  kmp_int64 chunk) {
1082  KMP_DEBUG_ASSERT(__kmp_init_serial);
1083  __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1084  chunk);
1085 }
1090 } // extern "C"
@ KMP_IDENT_KMPC
Definition: kmp.h:196
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:214
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:216
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:218
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition: kmp_stats.h:898
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:911
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:903
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:990
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:886
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1066
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:977
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1078
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:919
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:1054
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:964
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:870
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:1042
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1003
@ kmp_sch_static
Definition: kmp.h:360
@ kmp_distribute_static
Definition: kmp.h:396
@ kmp_ord_upper
Definition: kmp.h:392
Definition: kmp.h:234
char const * psource
Definition: kmp.h:244
kmp_int32 flags
Definition: kmp.h:236