LLVM OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_sched.cpp
1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15  NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16  it may change values between parallel regions. __kmp_max_nth
17  is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 template <typename T>
42 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
43  kmp_int32 schedtype, kmp_int32 *plastiter,
44  T *plower, T *pupper,
45  typename traits_t<T>::signed_t *pstride,
46  typename traits_t<T>::signed_t incr,
47  typename traits_t<T>::signed_t chunk
48 #if OMPT_SUPPORT && OMPT_OPTIONAL
49  ,
50  void *codeptr
51 #endif
52  ) {
53  KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
54  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
55  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
56 
57  typedef typename traits_t<T>::unsigned_t UT;
58  typedef typename traits_t<T>::signed_t ST;
59  /* this all has to be changed back to TID and such.. */
60  kmp_int32 gtid = global_tid;
61  kmp_uint32 tid;
62  kmp_uint32 nth;
63  UT trip_count;
64  kmp_team_t *team;
65  kmp_info_t *th = __kmp_threads[gtid];
66 
67 #if OMPT_SUPPORT && OMPT_OPTIONAL
68  ompt_team_info_t *team_info = NULL;
69  ompt_task_info_t *task_info = NULL;
70  ompt_work_t ompt_work_type = ompt_work_loop;
71 
72  static kmp_int8 warn = 0;
73 
74  if (ompt_enabled.ompt_callback_work) {
75  // Only fully initialize variables needed by OMPT if OMPT is enabled.
76  team_info = __ompt_get_teaminfo(0, NULL);
77  task_info = __ompt_get_task_info_object(0);
78  // Determine workshare type
79  if (loc != NULL) {
80  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
81  ompt_work_type = ompt_work_loop;
82  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
83  ompt_work_type = ompt_work_sections;
84  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
85  ompt_work_type = ompt_work_distribute;
86  } else {
87  kmp_int8 bool_res =
88  KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
89  if (bool_res)
90  KMP_WARNING(OmptOutdatedWorkshare);
91  }
92  KMP_DEBUG_ASSERT(ompt_work_type);
93  }
94  }
95 #endif
96 
97  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
98  KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
99 #ifdef KMP_DEBUG
100  {
101  char *buff;
102  // create format specifiers before the debug output
103  buff = __kmp_str_format(
104  "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
105  " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
106  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
107  traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
108  KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
109  *pstride, incr, chunk));
110  __kmp_str_free(&buff);
111  }
112 #endif
113 
114  if (__kmp_env_consistency_check) {
115  __kmp_push_workshare(global_tid, ct_pdo, loc);
116  if (incr == 0) {
117  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
118  loc);
119  }
120  }
121  /* special handling for zero-trip loops */
122  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
123  if (plastiter != NULL)
124  *plastiter = FALSE;
125  /* leave pupper and plower set to entire iteration space */
126  *pstride = incr; /* value should never be used */
127 // *plower = *pupper - incr;
128 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
129 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
130 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
131 #ifdef KMP_DEBUG
132  {
133  char *buff;
134  // create format specifiers before the debug output
135  buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
136  "lower=%%%s upper=%%%s stride = %%%s "
137  "signed?<%s>, loc = %%s\n",
138  traits_t<T>::spec, traits_t<T>::spec,
139  traits_t<ST>::spec, traits_t<T>::spec);
140  KD_TRACE(100,
141  (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
142  __kmp_str_free(&buff);
143  }
144 #endif
145  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
146 
147 #if OMPT_SUPPORT && OMPT_OPTIONAL
148  if (ompt_enabled.ompt_callback_work) {
149  ompt_callbacks.ompt_callback(ompt_callback_work)(
150  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
151  &(task_info->task_data), 0, codeptr);
152  }
153 #endif
154  return;
155  }
156 
157 #if OMP_40_ENABLED
158  // Although there are schedule enumerations above kmp_ord_upper which are not
159  // schedules for "distribute", the only ones which are useful are dynamic, so
160  // cannot be seen here, since this codepath is only executed for static
161  // schedules.
162  if (schedtype > kmp_ord_upper) {
163  // we are in DISTRIBUTE construct
164  schedtype += kmp_sch_static -
165  kmp_distribute_static; // AC: convert to usual schedule type
166  tid = th->th.th_team->t.t_master_tid;
167  team = th->th.th_team->t.t_parent;
168  } else
169 #endif
170  {
171  tid = __kmp_tid_from_gtid(global_tid);
172  team = th->th.th_team;
173  }
174 
175  /* determine if "for" loop is an active worksharing construct */
176  if (team->t.t_serialized) {
177  /* serialized parallel, each thread executes whole iteration space */
178  if (plastiter != NULL)
179  *plastiter = TRUE;
180  /* leave pupper and plower set to entire iteration space */
181  *pstride =
182  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
183 
184 #ifdef KMP_DEBUG
185  {
186  char *buff;
187  // create format specifiers before the debug output
188  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
189  "lower=%%%s upper=%%%s stride = %%%s\n",
190  traits_t<T>::spec, traits_t<T>::spec,
191  traits_t<ST>::spec);
192  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
193  __kmp_str_free(&buff);
194  }
195 #endif
196  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
197 
198 #if OMPT_SUPPORT && OMPT_OPTIONAL
199  if (ompt_enabled.ompt_callback_work) {
200  ompt_callbacks.ompt_callback(ompt_callback_work)(
201  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
202  &(task_info->task_data), *pstride, codeptr);
203  }
204 #endif
205  return;
206  }
207  nth = team->t.t_nproc;
208  if (nth == 1) {
209  if (plastiter != NULL)
210  *plastiter = TRUE;
211  *pstride =
212  (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
213 #ifdef KMP_DEBUG
214  {
215  char *buff;
216  // create format specifiers before the debug output
217  buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
218  "lower=%%%s upper=%%%s stride = %%%s\n",
219  traits_t<T>::spec, traits_t<T>::spec,
220  traits_t<ST>::spec);
221  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
222  __kmp_str_free(&buff);
223  }
224 #endif
225  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
226 
227 #if OMPT_SUPPORT && OMPT_OPTIONAL
228  if (ompt_enabled.ompt_callback_work) {
229  ompt_callbacks.ompt_callback(ompt_callback_work)(
230  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
231  &(task_info->task_data), *pstride, codeptr);
232  }
233 #endif
234  return;
235  }
236 
237  /* compute trip count */
238  if (incr == 1) {
239  trip_count = *pupper - *plower + 1;
240  } else if (incr == -1) {
241  trip_count = *plower - *pupper + 1;
242  } else if (incr > 0) {
243  // upper-lower can exceed the limit of signed type
244  trip_count = (UT)(*pupper - *plower) / incr + 1;
245  } else {
246  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
247  }
248 
249  if (__kmp_env_consistency_check) {
250  /* tripcount overflow? */
251  if (trip_count == 0 && *pupper != *plower) {
252  __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
253  loc);
254  }
255  }
256 
257  /* compute remaining parameters */
258  switch (schedtype) {
259  case kmp_sch_static: {
260  if (trip_count < nth) {
261  KMP_DEBUG_ASSERT(
262  __kmp_static == kmp_sch_static_greedy ||
263  __kmp_static ==
264  kmp_sch_static_balanced); // Unknown static scheduling type.
265  if (tid < trip_count) {
266  *pupper = *plower = *plower + tid * incr;
267  } else {
268  *plower = *pupper + incr;
269  }
270  if (plastiter != NULL)
271  *plastiter = (tid == trip_count - 1);
272  } else {
273  if (__kmp_static == kmp_sch_static_balanced) {
274  UT small_chunk = trip_count / nth;
275  UT extras = trip_count % nth;
276  *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
277  *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
278  if (plastiter != NULL)
279  *plastiter = (tid == nth - 1);
280  } else {
281  T big_chunk_inc_count =
282  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
283  T old_upper = *pupper;
284 
285  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
286  // Unknown static scheduling type.
287 
288  *plower += tid * big_chunk_inc_count;
289  *pupper = *plower + big_chunk_inc_count - incr;
290  if (incr > 0) {
291  if (*pupper < *plower)
292  *pupper = traits_t<T>::max_value;
293  if (plastiter != NULL)
294  *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
295  if (*pupper > old_upper)
296  *pupper = old_upper; // tracker C73258
297  } else {
298  if (*pupper > *plower)
299  *pupper = traits_t<T>::min_value;
300  if (plastiter != NULL)
301  *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
302  if (*pupper < old_upper)
303  *pupper = old_upper; // tracker C73258
304  }
305  }
306  }
307  *pstride = trip_count;
308  break;
309  }
310  case kmp_sch_static_chunked: {
311  ST span;
312  if (chunk < 1) {
313  chunk = 1;
314  }
315  span = chunk * incr;
316  *pstride = span * nth;
317  *plower = *plower + (span * tid);
318  *pupper = *plower + span - incr;
319  if (plastiter != NULL)
320  *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
321  break;
322  }
323 #if OMP_45_ENABLED
324  case kmp_sch_static_balanced_chunked: {
325  T old_upper = *pupper;
326  // round up to make sure the chunk is enough to cover all iterations
327  UT span = (trip_count + nth - 1) / nth;
328 
329  // perform chunk adjustment
330  chunk = (span + chunk - 1) & ~(chunk - 1);
331 
332  span = chunk * incr;
333  *plower = *plower + (span * tid);
334  *pupper = *plower + span - incr;
335  if (incr > 0) {
336  if (*pupper > old_upper)
337  *pupper = old_upper;
338  } else if (*pupper < old_upper)
339  *pupper = old_upper;
340 
341  if (plastiter != NULL)
342  *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
343  break;
344  }
345 #endif
346  default:
347  KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
348  break;
349  }
350 
351 #if USE_ITT_BUILD
352  // Report loop metadata
353  if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
354  __kmp_forkjoin_frames_mode == 3 &&
355 #if OMP_40_ENABLED
356  th->th.th_teams_microtask == NULL &&
357 #endif
358  team->t.t_active_level == 1) {
359  kmp_uint64 cur_chunk = chunk;
360  // Calculate chunk in case it was not specified; it is specified for
361  // kmp_sch_static_chunked
362  if (schedtype == kmp_sch_static) {
363  cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
364  }
365  // 0 - "static" schedule
366  __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
367  }
368 #endif
369 #ifdef KMP_DEBUG
370  {
371  char *buff;
372  // create format specifiers before the debug output
373  buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
374  "upper=%%%s stride = %%%s signed?<%s>\n",
375  traits_t<T>::spec, traits_t<T>::spec,
376  traits_t<ST>::spec, traits_t<T>::spec);
377  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
378  __kmp_str_free(&buff);
379  }
380 #endif
381  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
382 
383 #if OMPT_SUPPORT && OMPT_OPTIONAL
384  if (ompt_enabled.ompt_callback_work) {
385  ompt_callbacks.ompt_callback(ompt_callback_work)(
386  ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
387  &(task_info->task_data), trip_count, codeptr);
388  }
389 #endif
390 
391 #if KMP_STATS_ENABLED
392  {
393  kmp_int64 t;
394  kmp_int64 u = (kmp_int64)(*pupper);
395  kmp_int64 l = (kmp_int64)(*plower);
396  kmp_int64 i = (kmp_int64)incr;
397  /* compute trip count */
398  if (i == 1) {
399  t = u - l + 1;
400  } else if (i == -1) {
401  t = l - u + 1;
402  } else if (i > 0) {
403  t = (u - l) / i + 1;
404  } else {
405  t = (l - u) / (-i) + 1;
406  }
407  KMP_COUNT_VALUE(OMP_loop_static_iterations, t);
408  KMP_POP_PARTITIONED_TIMER();
409  }
410 #endif
411  return;
412 }
413 
414 template <typename T>
415 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
416  kmp_int32 schedule, kmp_int32 *plastiter,
417  T *plower, T *pupper, T *pupperDist,
418  typename traits_t<T>::signed_t *pstride,
419  typename traits_t<T>::signed_t incr,
420  typename traits_t<T>::signed_t chunk) {
421  KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
422  typedef typename traits_t<T>::unsigned_t UT;
423  typedef typename traits_t<T>::signed_t ST;
424  kmp_uint32 tid;
425  kmp_uint32 nth;
426  kmp_uint32 team_id;
427  kmp_uint32 nteams;
428  UT trip_count;
429  kmp_team_t *team;
430  kmp_info_t *th;
431 
432  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
433  KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
434 #ifdef KMP_DEBUG
435  {
436  char *buff;
437  // create format specifiers before the debug output
438  buff = __kmp_str_format(
439  "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
440  "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
441  traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
442  traits_t<ST>::spec, traits_t<T>::spec);
443  KD_TRACE(100,
444  (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
445  __kmp_str_free(&buff);
446  }
447 #endif
448 
449  if (__kmp_env_consistency_check) {
450  __kmp_push_workshare(gtid, ct_pdo, loc);
451  if (incr == 0) {
452  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
453  loc);
454  }
455  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
456  // The loop is illegal.
457  // Some zero-trip loops maintained by compiler, e.g.:
458  // for(i=10;i<0;++i) // lower >= upper - run-time check
459  // for(i=0;i>10;--i) // lower <= upper - run-time check
460  // for(i=0;i>10;++i) // incr > 0 - compile-time check
461  // for(i=10;i<0;--i) // incr < 0 - compile-time check
462  // Compiler does not check the following illegal loops:
463  // for(i=0;i<10;i+=incr) // where incr<0
464  // for(i=10;i>0;i-=incr) // where incr<0
465  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
466  }
467  }
468  tid = __kmp_tid_from_gtid(gtid);
469  th = __kmp_threads[gtid];
470  nth = th->th.th_team_nproc;
471  team = th->th.th_team;
472 #if OMP_40_ENABLED
473  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
474  nteams = th->th.th_teams_size.nteams;
475 #endif
476  team_id = team->t.t_master_tid;
477  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
478 
479  // compute global trip count
480  if (incr == 1) {
481  trip_count = *pupper - *plower + 1;
482  } else if (incr == -1) {
483  trip_count = *plower - *pupper + 1;
484  } else if (incr > 0) {
485  // upper-lower can exceed the limit of signed type
486  trip_count = (UT)(*pupper - *plower) / incr + 1;
487  } else {
488  trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
489  }
490 
491  *pstride = *pupper - *plower; // just in case (can be unused)
492  if (trip_count <= nteams) {
493  KMP_DEBUG_ASSERT(
494  __kmp_static == kmp_sch_static_greedy ||
495  __kmp_static ==
496  kmp_sch_static_balanced); // Unknown static scheduling type.
497  // only masters of some teams get single iteration, other threads get
498  // nothing
499  if (team_id < trip_count && tid == 0) {
500  *pupper = *pupperDist = *plower = *plower + team_id * incr;
501  } else {
502  *pupperDist = *pupper;
503  *plower = *pupper + incr; // compiler should skip loop body
504  }
505  if (plastiter != NULL)
506  *plastiter = (tid == 0 && team_id == trip_count - 1);
507  } else {
508  // Get the team's chunk first (each team gets at most one chunk)
509  if (__kmp_static == kmp_sch_static_balanced) {
510  UT chunkD = trip_count / nteams;
511  UT extras = trip_count % nteams;
512  *plower +=
513  incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
514  *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
515  if (plastiter != NULL)
516  *plastiter = (team_id == nteams - 1);
517  } else {
518  T chunk_inc_count =
519  (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
520  T upper = *pupper;
521  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
522  // Unknown static scheduling type.
523  *plower += team_id * chunk_inc_count;
524  *pupperDist = *plower + chunk_inc_count - incr;
525  // Check/correct bounds if needed
526  if (incr > 0) {
527  if (*pupperDist < *plower)
528  *pupperDist = traits_t<T>::max_value;
529  if (plastiter != NULL)
530  *plastiter = *plower <= upper && *pupperDist > upper - incr;
531  if (*pupperDist > upper)
532  *pupperDist = upper; // tracker C73258
533  if (*plower > *pupperDist) {
534  *pupper = *pupperDist; // no iterations available for the team
535  goto end;
536  }
537  } else {
538  if (*pupperDist > *plower)
539  *pupperDist = traits_t<T>::min_value;
540  if (plastiter != NULL)
541  *plastiter = *plower >= upper && *pupperDist < upper - incr;
542  if (*pupperDist < upper)
543  *pupperDist = upper; // tracker C73258
544  if (*plower < *pupperDist) {
545  *pupper = *pupperDist; // no iterations available for the team
546  goto end;
547  }
548  }
549  }
550  // Get the parallel loop chunk now (for thread)
551  // compute trip count for team's chunk
552  if (incr == 1) {
553  trip_count = *pupperDist - *plower + 1;
554  } else if (incr == -1) {
555  trip_count = *plower - *pupperDist + 1;
556  } else if (incr > 1) {
557  // upper-lower can exceed the limit of signed type
558  trip_count = (UT)(*pupperDist - *plower) / incr + 1;
559  } else {
560  trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
561  }
562  KMP_DEBUG_ASSERT(trip_count);
563  switch (schedule) {
564  case kmp_sch_static: {
565  if (trip_count <= nth) {
566  KMP_DEBUG_ASSERT(
567  __kmp_static == kmp_sch_static_greedy ||
568  __kmp_static ==
569  kmp_sch_static_balanced); // Unknown static scheduling type.
570  if (tid < trip_count)
571  *pupper = *plower = *plower + tid * incr;
572  else
573  *plower = *pupper + incr; // no iterations available
574  if (plastiter != NULL)
575  if (*plastiter != 0 && !(tid == trip_count - 1))
576  *plastiter = 0;
577  } else {
578  if (__kmp_static == kmp_sch_static_balanced) {
579  UT chunkL = trip_count / nth;
580  UT extras = trip_count % nth;
581  *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
582  *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
583  if (plastiter != NULL)
584  if (*plastiter != 0 && !(tid == nth - 1))
585  *plastiter = 0;
586  } else {
587  T chunk_inc_count =
588  (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
589  T upper = *pupperDist;
590  KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
591  // Unknown static scheduling type.
592  *plower += tid * chunk_inc_count;
593  *pupper = *plower + chunk_inc_count - incr;
594  if (incr > 0) {
595  if (*pupper < *plower)
596  *pupper = traits_t<T>::max_value;
597  if (plastiter != NULL)
598  if (*plastiter != 0 &&
599  !(*plower <= upper && *pupper > upper - incr))
600  *plastiter = 0;
601  if (*pupper > upper)
602  *pupper = upper; // tracker C73258
603  } else {
604  if (*pupper > *plower)
605  *pupper = traits_t<T>::min_value;
606  if (plastiter != NULL)
607  if (*plastiter != 0 &&
608  !(*plower >= upper && *pupper < upper - incr))
609  *plastiter = 0;
610  if (*pupper < upper)
611  *pupper = upper; // tracker C73258
612  }
613  }
614  }
615  break;
616  }
617  case kmp_sch_static_chunked: {
618  ST span;
619  if (chunk < 1)
620  chunk = 1;
621  span = chunk * incr;
622  *pstride = span * nth;
623  *plower = *plower + (span * tid);
624  *pupper = *plower + span - incr;
625  if (plastiter != NULL)
626  if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
627  *plastiter = 0;
628  break;
629  }
630  default:
631  KMP_ASSERT2(0,
632  "__kmpc_dist_for_static_init: unknown loop scheduling type");
633  break;
634  }
635  }
636 end:;
637 #ifdef KMP_DEBUG
638  {
639  char *buff;
640  // create format specifiers before the debug output
641  buff = __kmp_str_format(
642  "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
643  "stride=%%%s signed?<%s>\n",
644  traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
645  traits_t<ST>::spec, traits_t<T>::spec);
646  KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
647  __kmp_str_free(&buff);
648  }
649 #endif
650  KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
651  return;
652 }
653 
654 template <typename T>
655 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
656  kmp_int32 *p_last, T *p_lb, T *p_ub,
657  typename traits_t<T>::signed_t *p_st,
658  typename traits_t<T>::signed_t incr,
659  typename traits_t<T>::signed_t chunk) {
660  // The routine returns the first chunk distributed to the team and
661  // stride for next chunks calculation.
662  // Last iteration flag set for the team that will execute
663  // the last iteration of the loop.
664  // The routine is called for dist_schedue(static,chunk) only.
665  typedef typename traits_t<T>::unsigned_t UT;
666  typedef typename traits_t<T>::signed_t ST;
667  kmp_uint32 team_id;
668  kmp_uint32 nteams;
669  UT trip_count;
670  T lower;
671  T upper;
672  ST span;
673  kmp_team_t *team;
674  kmp_info_t *th;
675 
676  KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
677  KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
678 #ifdef KMP_DEBUG
679  {
680  char *buff;
681  // create format specifiers before the debug output
682  buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
683  "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
684  traits_t<T>::spec, traits_t<T>::spec,
685  traits_t<ST>::spec, traits_t<ST>::spec,
686  traits_t<T>::spec);
687  KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
688  __kmp_str_free(&buff);
689  }
690 #endif
691 
692  lower = *p_lb;
693  upper = *p_ub;
694  if (__kmp_env_consistency_check) {
695  if (incr == 0) {
696  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
697  loc);
698  }
699  if (incr > 0 ? (upper < lower) : (lower < upper)) {
700  // The loop is illegal.
701  // Some zero-trip loops maintained by compiler, e.g.:
702  // for(i=10;i<0;++i) // lower >= upper - run-time check
703  // for(i=0;i>10;--i) // lower <= upper - run-time check
704  // for(i=0;i>10;++i) // incr > 0 - compile-time check
705  // for(i=10;i<0;--i) // incr < 0 - compile-time check
706  // Compiler does not check the following illegal loops:
707  // for(i=0;i<10;i+=incr) // where incr<0
708  // for(i=10;i>0;i-=incr) // where incr<0
709  __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
710  }
711  }
712  th = __kmp_threads[gtid];
713  team = th->th.th_team;
714 #if OMP_40_ENABLED
715  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
716  nteams = th->th.th_teams_size.nteams;
717 #endif
718  team_id = team->t.t_master_tid;
719  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
720 
721  // compute trip count
722  if (incr == 1) {
723  trip_count = upper - lower + 1;
724  } else if (incr == -1) {
725  trip_count = lower - upper + 1;
726  } else if (incr > 0) {
727  // upper-lower can exceed the limit of signed type
728  trip_count = (UT)(upper - lower) / incr + 1;
729  } else {
730  trip_count = (UT)(lower - upper) / (-incr) + 1;
731  }
732  if (chunk < 1)
733  chunk = 1;
734  span = chunk * incr;
735  *p_st = span * nteams;
736  *p_lb = lower + (span * team_id);
737  *p_ub = *p_lb + span - incr;
738  if (p_last != NULL)
739  *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
740  // Correct upper bound if needed
741  if (incr > 0) {
742  if (*p_ub < *p_lb) // overflow?
743  *p_ub = traits_t<T>::max_value;
744  if (*p_ub > upper)
745  *p_ub = upper; // tracker C73258
746  } else { // incr < 0
747  if (*p_ub > *p_lb)
748  *p_ub = traits_t<T>::min_value;
749  if (*p_ub < upper)
750  *p_ub = upper; // tracker C73258
751  }
752 #ifdef KMP_DEBUG
753  {
754  char *buff;
755  // create format specifiers before the debug output
756  buff =
757  __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
758  "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
759  traits_t<T>::spec, traits_t<T>::spec,
760  traits_t<ST>::spec, traits_t<ST>::spec);
761  KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
762  __kmp_str_free(&buff);
763  }
764 #endif
765 }
766 
767 //------------------------------------------------------------------------------
768 extern "C" {
790 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
791  kmp_int32 *plastiter, kmp_int32 *plower,
792  kmp_int32 *pupper, kmp_int32 *pstride,
793  kmp_int32 incr, kmp_int32 chunk) {
794  __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
795  pupper, pstride, incr, chunk
796 #if OMPT_SUPPORT && OMPT_OPTIONAL
797  ,
798  OMPT_GET_RETURN_ADDRESS(0)
799 #endif
800  );
801 }
802 
806 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
807  kmp_int32 schedtype, kmp_int32 *plastiter,
808  kmp_uint32 *plower, kmp_uint32 *pupper,
809  kmp_int32 *pstride, kmp_int32 incr,
810  kmp_int32 chunk) {
811  __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
812  pupper, pstride, incr, chunk
813 #if OMPT_SUPPORT && OMPT_OPTIONAL
814  ,
815  OMPT_GET_RETURN_ADDRESS(0)
816 #endif
817  );
818 }
819 
823 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
824  kmp_int32 *plastiter, kmp_int64 *plower,
825  kmp_int64 *pupper, kmp_int64 *pstride,
826  kmp_int64 incr, kmp_int64 chunk) {
827  __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
828  pupper, pstride, incr, chunk
829 #if OMPT_SUPPORT && OMPT_OPTIONAL
830  ,
831  OMPT_GET_RETURN_ADDRESS(0)
832 #endif
833  );
834 }
835 
839 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
840  kmp_int32 schedtype, kmp_int32 *plastiter,
841  kmp_uint64 *plower, kmp_uint64 *pupper,
842  kmp_int64 *pstride, kmp_int64 incr,
843  kmp_int64 chunk) {
844  __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
845  pupper, pstride, incr, chunk
846 #if OMPT_SUPPORT && OMPT_OPTIONAL
847  ,
848  OMPT_GET_RETURN_ADDRESS(0)
849 #endif
850  );
851 }
878 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
879  kmp_int32 schedule, kmp_int32 *plastiter,
880  kmp_int32 *plower, kmp_int32 *pupper,
881  kmp_int32 *pupperD, kmp_int32 *pstride,
882  kmp_int32 incr, kmp_int32 chunk) {
883  __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
884  pupper, pupperD, pstride, incr, chunk);
885 }
886 
890 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
891  kmp_int32 schedule, kmp_int32 *plastiter,
892  kmp_uint32 *plower, kmp_uint32 *pupper,
893  kmp_uint32 *pupperD, kmp_int32 *pstride,
894  kmp_int32 incr, kmp_int32 chunk) {
895  __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
896  pupper, pupperD, pstride, incr, chunk);
897 }
898 
902 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
903  kmp_int32 schedule, kmp_int32 *plastiter,
904  kmp_int64 *plower, kmp_int64 *pupper,
905  kmp_int64 *pupperD, kmp_int64 *pstride,
906  kmp_int64 incr, kmp_int64 chunk) {
907  __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
908  pupper, pupperD, pstride, incr, chunk);
909 }
910 
914 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
915  kmp_int32 schedule, kmp_int32 *plastiter,
916  kmp_uint64 *plower, kmp_uint64 *pupper,
917  kmp_uint64 *pupperD, kmp_int64 *pstride,
918  kmp_int64 incr, kmp_int64 chunk) {
919  __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
920  pupper, pupperD, pstride, incr, chunk);
921 }
926 //------------------------------------------------------------------------------
927 // Auxiliary routines for Distribute Parallel Loop construct implementation
928 // Transfer call to template< type T >
929 // __kmp_team_static_init( ident_t *loc, int gtid,
930 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
931 
952 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
953  kmp_int32 *p_lb, kmp_int32 *p_ub,
954  kmp_int32 *p_st, kmp_int32 incr,
955  kmp_int32 chunk) {
956  KMP_DEBUG_ASSERT(__kmp_init_serial);
957  __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
958  chunk);
959 }
960 
964 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
965  kmp_uint32 *p_lb, kmp_uint32 *p_ub,
966  kmp_int32 *p_st, kmp_int32 incr,
967  kmp_int32 chunk) {
968  KMP_DEBUG_ASSERT(__kmp_init_serial);
969  __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
970  chunk);
971 }
972 
976 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
977  kmp_int64 *p_lb, kmp_int64 *p_ub,
978  kmp_int64 *p_st, kmp_int64 incr,
979  kmp_int64 chunk) {
980  KMP_DEBUG_ASSERT(__kmp_init_serial);
981  __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
982  chunk);
983 }
984 
988 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
989  kmp_uint64 *p_lb, kmp_uint64 *p_ub,
990  kmp_int64 *p_st, kmp_int64 incr,
991  kmp_int64 chunk) {
992  KMP_DEBUG_ASSERT(__kmp_init_serial);
993  __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
994  chunk);
995 }
1000 } // extern "C"
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:976
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:890
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition: kmp_stats.h:876
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:952
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:823
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:889
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:964
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:806
Definition: kmp.h:223
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:878
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:988
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:914
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:790
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:902
char const * psource
Definition: kmp.h:233
kmp_int32 flags
Definition: kmp.h:225
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:839