16 #include "kmp_stats.h" 17 #include "kmp_wait_release.h" 18 #include "kmp_taskdeps.h" 21 #include "ompt-specific.h" 24 #include "tsan_annotations.h" 27 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
28 kmp_info_t *this_thr);
29 static void __kmp_alloc_task_deque(kmp_info_t *thread,
30 kmp_thread_data_t *thread_data);
31 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
32 kmp_task_team_t *task_team);
33 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
35 #ifdef BUILD_TIED_TASK_STACK 44 static void __kmp_trace_task_stack(kmp_int32 gtid,
45 kmp_thread_data_t *thread_data,
46 int threshold,
char *location) {
47 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
48 kmp_taskdata_t **stack_top = task_stack->ts_top;
49 kmp_int32 entries = task_stack->ts_entries;
50 kmp_taskdata_t *tied_task;
54 (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " 55 "first_block = %p, stack_top = %p \n",
56 location, gtid, entries, task_stack->ts_first_block, stack_top));
58 KMP_DEBUG_ASSERT(stack_top != NULL);
59 KMP_DEBUG_ASSERT(entries > 0);
61 while (entries != 0) {
62 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
64 if (entries & TASK_STACK_INDEX_MASK == 0) {
65 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
67 stack_block = stack_block->sb_prev;
68 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
75 tied_task = *stack_top;
77 KMP_DEBUG_ASSERT(tied_task != NULL);
78 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
81 (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " 82 "stack_top=%p, tied_task=%p\n",
83 location, gtid, entries, stack_top, tied_task));
85 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
88 (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
98 static void __kmp_init_task_stack(kmp_int32 gtid,
99 kmp_thread_data_t *thread_data) {
100 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
101 kmp_stack_block_t *first_block;
104 first_block = &task_stack->ts_first_block;
105 task_stack->ts_top = (kmp_taskdata_t **)first_block;
106 memset((
void *)first_block,
'\0',
107 TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
110 task_stack->ts_entries = TASK_STACK_EMPTY;
111 first_block->sb_next = NULL;
112 first_block->sb_prev = NULL;
119 static void __kmp_free_task_stack(kmp_int32 gtid,
120 kmp_thread_data_t *thread_data) {
121 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
122 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
124 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
126 while (stack_block != NULL) {
127 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
129 stack_block->sb_next = NULL;
130 stack_block->sb_prev = NULL;
131 if (stack_block != &task_stack->ts_first_block) {
132 __kmp_thread_free(thread,
135 stack_block = next_block;
138 task_stack->ts_entries = 0;
139 task_stack->ts_top = NULL;
148 static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
149 kmp_taskdata_t *tied_task) {
151 kmp_thread_data_t *thread_data =
152 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
153 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
155 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
159 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
160 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
163 (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
164 gtid, thread, tied_task));
166 *(task_stack->ts_top) = tied_task;
169 task_stack->ts_top++;
170 task_stack->ts_entries++;
172 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
174 kmp_stack_block_t *stack_block =
175 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
178 if (stack_block->sb_next !=
180 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
182 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
183 thread,
sizeof(kmp_stack_block_t));
185 task_stack->ts_top = &new_block->sb_block[0];
186 stack_block->sb_next = new_block;
187 new_block->sb_prev = stack_block;
188 new_block->sb_next = NULL;
192 (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
193 gtid, tied_task, new_block));
196 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
207 static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
208 kmp_taskdata_t *ending_task) {
210 kmp_thread_data_t *thread_data =
211 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
212 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
213 kmp_taskdata_t *tied_task;
215 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
220 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
221 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
223 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
227 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
228 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
230 stack_block = stack_block->sb_prev;
231 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
235 task_stack->ts_top--;
236 task_stack->ts_entries--;
238 tied_task = *(task_stack->ts_top);
240 KMP_DEBUG_ASSERT(tied_task != NULL);
241 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
242 KMP_DEBUG_ASSERT(tied_task == ending_task);
244 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
253 static bool __kmp_task_is_allowed(
int gtid,
const kmp_int32 is_constrained,
254 const kmp_taskdata_t *tasknew,
255 const kmp_taskdata_t *taskcurr) {
256 if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
260 kmp_taskdata_t *current = taskcurr->td_last_tied;
261 KMP_DEBUG_ASSERT(current != NULL);
263 if (current->td_flags.tasktype == TASK_EXPLICIT ||
264 current->td_taskwait_thread > 0) {
265 kmp_int32 level = current->td_level;
266 kmp_taskdata_t *parent = tasknew->td_parent;
267 while (parent != current && parent->td_level > level) {
269 parent = parent->td_parent;
270 KMP_DEBUG_ASSERT(parent != NULL);
272 if (parent != current)
277 kmp_depnode_t *node = tasknew->td_depnode;
278 if (node && (node->dn.mtx_num_locks > 0)) {
279 for (
int i = 0; i < node->dn.mtx_num_locks; ++i) {
280 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
281 if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
284 for (
int j = i - 1; j >= 0; --j)
285 __kmp_release_lock(node->dn.mtx_locks[j], gtid);
289 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
298 static void __kmp_realloc_task_deque(kmp_info_t *thread,
299 kmp_thread_data_t *thread_data) {
300 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
301 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
302 kmp_int32 new_size = 2 * size;
304 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to " 305 "%d] for thread_data %p\n",
306 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
308 kmp_taskdata_t **new_deque =
309 (kmp_taskdata_t **)__kmp_allocate(new_size *
sizeof(kmp_taskdata_t *));
312 for (i = thread_data->td.td_deque_head, j = 0; j < size;
313 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
314 new_deque[j] = thread_data->td.td_deque[i];
316 __kmp_free(thread_data->td.td_deque);
318 thread_data->td.td_deque_head = 0;
319 thread_data->td.td_deque_tail = size;
320 thread_data->td.td_deque = new_deque;
321 thread_data->td.td_deque_size = new_size;
325 static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
326 kmp_info_t *thread = __kmp_threads[gtid];
327 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
328 kmp_task_team_t *task_team = thread->th.th_task_team;
329 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
330 kmp_thread_data_t *thread_data;
333 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
335 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
338 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
339 KMP_DEBUG_USE_VAR(counter);
342 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
343 gtid, counter, taskdata));
347 if (taskdata->td_flags.task_serial) {
348 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning " 349 "TASK_NOT_PUSHED for task %p\n",
351 return TASK_NOT_PUSHED;
356 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
357 if (!KMP_TASKING_ENABLED(task_team)) {
358 __kmp_enable_tasking(task_team, thread);
360 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
361 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
364 thread_data = &task_team->tt.tt_threads_data[tid];
367 if (thread_data->td.td_deque == NULL) {
368 __kmp_alloc_task_deque(thread, thread_data);
373 if (TCR_4(thread_data->td.td_deque_ntasks) >=
374 TASK_DEQUE_SIZE(thread_data->td)) {
375 if (__kmp_enable_task_throttling &&
376 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
377 thread->th.th_current_task)) {
378 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning " 379 "TASK_NOT_PUSHED for task %p\n",
381 return TASK_NOT_PUSHED;
383 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
385 if (TCR_4(thread_data->td.td_deque_ntasks) >=
386 TASK_DEQUE_SIZE(thread_data->td)) {
388 __kmp_realloc_task_deque(thread, thread_data);
394 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
396 if (TCR_4(thread_data->td.td_deque_ntasks) >=
397 TASK_DEQUE_SIZE(thread_data->td)) {
398 if (__kmp_enable_task_throttling &&
399 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
400 thread->th.th_current_task)) {
401 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
402 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; " 403 "returning TASK_NOT_PUSHED for task %p\n",
405 return TASK_NOT_PUSHED;
408 __kmp_realloc_task_deque(thread, thread_data);
413 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
414 TASK_DEQUE_SIZE(thread_data->td));
416 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
419 thread_data->td.td_deque_tail =
420 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
421 TCW_4(thread_data->td.td_deque_ntasks,
422 TCR_4(thread_data->td.td_deque_ntasks) + 1);
423 KMP_FSYNC_RELEASING(thread->th.th_current_task);
424 KMP_FSYNC_RELEASING(taskdata);
425 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " 426 "task=%p ntasks=%d head=%u tail=%u\n",
427 gtid, taskdata, thread_data->td.td_deque_ntasks,
428 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
430 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
432 return TASK_SUCCESSFULLY_PUSHED;
439 void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
440 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d " 441 "this_thread=%p, curtask=%p, " 442 "curtask_parent=%p\n",
443 0, this_thr, this_thr->th.th_current_task,
444 this_thr->th.th_current_task->td_parent));
446 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
448 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d " 449 "this_thread=%p, curtask=%p, " 450 "curtask_parent=%p\n",
451 0, this_thr, this_thr->th.th_current_task,
452 this_thr->th.th_current_task->td_parent));
461 void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
465 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p " 468 tid, this_thr, this_thr->th.th_current_task,
469 team->t.t_implicit_task_taskdata[tid].td_parent));
471 KMP_DEBUG_ASSERT(this_thr != NULL);
474 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
475 team->t.t_implicit_task_taskdata[0].td_parent =
476 this_thr->th.th_current_task;
477 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
480 team->t.t_implicit_task_taskdata[tid].td_parent =
481 team->t.t_implicit_task_taskdata[0].td_parent;
482 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
485 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p " 488 tid, this_thr, this_thr->th.th_current_task,
489 team->t.t_implicit_task_taskdata[tid].td_parent));
497 static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
498 kmp_taskdata_t *current_task) {
499 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
500 kmp_info_t *thread = __kmp_threads[gtid];
503 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
504 gtid, taskdata, current_task));
506 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
511 current_task->td_flags.executing = 0;
514 #ifdef BUILD_TIED_TASK_STACK 515 if (taskdata->td_flags.tiedness == TASK_TIED) {
516 __kmp_push_task_stack(gtid, thread, taskdata);
521 thread->th.th_current_task = taskdata;
523 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
524 taskdata->td_flags.tiedness == TASK_UNTIED);
525 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
526 taskdata->td_flags.tiedness == TASK_UNTIED);
527 taskdata->td_flags.started = 1;
528 taskdata->td_flags.executing = 1;
529 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
530 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
537 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
548 static inline void __ompt_task_init(kmp_taskdata_t *task,
int tid) {
550 task->ompt_task_info.task_data.value = 0;
551 task->ompt_task_info.frame.exit_frame = ompt_data_none;
552 task->ompt_task_info.frame.enter_frame = ompt_data_none;
553 task->ompt_task_info.frame.exit_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
554 task->ompt_task_info.frame.enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
559 static inline void __ompt_task_start(kmp_task_t *task,
560 kmp_taskdata_t *current_task,
562 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
563 ompt_task_status_t status = ompt_task_switch;
564 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
565 status = ompt_task_yield;
566 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
569 if (ompt_enabled.ompt_callback_task_schedule) {
570 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
571 &(current_task->ompt_task_info.task_data), status,
572 &(taskdata->ompt_task_info.task_data));
574 taskdata->ompt_task_info.scheduling_parent = current_task;
579 static inline void __ompt_task_finish(kmp_task_t *task,
580 kmp_taskdata_t *resumed_task,
581 ompt_task_status_t status) {
582 if (ompt_enabled.ompt_callback_task_schedule) {
583 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
584 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
585 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
586 status = ompt_task_cancel;
590 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
591 &(taskdata->ompt_task_info.task_data), status,
592 (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
598 static void __kmpc_omp_task_begin_if0_template(
ident_t *loc_ref, kmp_int32 gtid,
601 void *return_address) {
602 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
603 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
605 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p " 607 gtid, loc_ref, taskdata, current_task));
609 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
612 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
613 KMP_DEBUG_USE_VAR(counter);
614 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) " 615 "incremented for task %p\n",
616 gtid, counter, taskdata));
619 taskdata->td_flags.task_serial =
621 __kmp_task_start(gtid, task, current_task);
625 if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
626 current_task->ompt_task_info.frame.enter_frame.ptr =
627 taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
628 current_task->ompt_task_info.frame.enter_frame_flags =
629 taskdata->ompt_task_info.frame.exit_frame_flags = ompt_frame_application | ompt_frame_framepointer;
631 if (ompt_enabled.ompt_callback_task_create) {
632 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
633 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
634 &(parent_info->task_data), &(parent_info->frame),
635 &(taskdata->ompt_task_info.task_data),
636 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
639 __ompt_task_start(task, current_task, gtid);
641 #endif // OMPT_SUPPORT 643 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
649 static void __kmpc_omp_task_begin_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
652 void *return_address) {
653 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
656 #endif // OMPT_SUPPORT 664 void __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid,
667 if (UNLIKELY(ompt_enabled.enabled)) {
668 OMPT_STORE_RETURN_ADDRESS(gtid);
669 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
670 OMPT_GET_FRAME_ADDRESS(1),
671 OMPT_LOAD_RETURN_ADDRESS(gtid));
675 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
681 void __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
682 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
686 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
687 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
689 __kmp_task_start(gtid, task, current_task);
691 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
692 loc_ref, KMP_TASK_TO_TASKDATA(task)));
695 #endif // TASK_UNUSED 702 static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
703 kmp_info_t *thread) {
704 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
708 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
709 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
710 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
711 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
712 KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
713 taskdata->td_flags.task_serial == 1);
714 KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
716 taskdata->td_flags.freed = 1;
717 ANNOTATE_HAPPENS_BEFORE(taskdata);
720 __kmp_fast_free(thread, taskdata);
722 __kmp_thread_free(thread, taskdata);
725 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
734 static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
735 kmp_taskdata_t *taskdata,
736 kmp_info_t *thread) {
739 kmp_int32 team_serial =
740 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
741 !taskdata->td_flags.proxy;
742 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
744 kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
745 KMP_DEBUG_ASSERT(children >= 0);
748 while (children == 0) {
749 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
751 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete " 752 "and freeing itself\n",
756 __kmp_free_task(gtid, taskdata, thread);
758 taskdata = parent_taskdata;
764 if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
765 if (taskdata->td_dephash) {
766 int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
767 kmp_tasking_flags_t flags_old = taskdata->td_flags;
768 if (children == 0 && flags_old.complete == 1) {
769 kmp_tasking_flags_t flags_new = flags_old;
770 flags_new.complete = 0;
771 if (KMP_COMPARE_AND_STORE_ACQ32(
772 RCAST(kmp_int32 *, &taskdata->td_flags),
773 *RCAST(kmp_int32 *, &flags_old),
774 *RCAST(kmp_int32 *, &flags_new))) {
775 KA_TRACE(100, (
"__kmp_free_task_and_ancestors: T#%d cleans " 776 "dephash of implicit task %p\n",
779 __kmp_dephash_free_entries(thread, taskdata->td_dephash);
786 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
787 KMP_DEBUG_ASSERT(children >= 0);
791 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " 792 "not freeing it yet\n",
793 gtid, taskdata, children));
806 static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
807 kmp_taskdata_t *resumed_task) {
808 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
809 kmp_info_t *thread = __kmp_threads[gtid];
810 kmp_task_team_t *task_team =
811 thread->th.th_task_team;
812 kmp_int32 children = 0;
814 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming " 816 gtid, taskdata, resumed_task));
818 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
821 #ifdef BUILD_TIED_TASK_STACK 822 if (taskdata->td_flags.tiedness == TASK_TIED) {
823 __kmp_pop_task_stack(gtid, thread, taskdata);
827 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
830 kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
833 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
834 gtid, counter, taskdata));
838 if (resumed_task == NULL) {
839 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
840 resumed_task = taskdata->td_parent;
843 thread->th.th_current_task = resumed_task;
844 resumed_task->td_flags.executing = 1;
845 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, " 846 "resuming task %p\n",
847 gtid, taskdata, resumed_task));
853 kmp_depnode_t *node = taskdata->td_depnode;
854 if (node && (node->dn.mtx_num_locks < 0)) {
856 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
857 for (
int i = node->dn.mtx_num_locks - 1; i >= 0; --i) {
858 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
859 __kmp_release_lock(node->dn.mtx_locks[i], gtid);
866 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
867 taskdata->td_flags.task_serial);
868 if (taskdata->td_flags.task_serial) {
869 if (resumed_task == NULL) {
870 resumed_task = taskdata->td_parent;
874 KMP_DEBUG_ASSERT(resumed_task !=
884 if (taskdata->td_flags.destructors_thunk) {
885 kmp_routine_entry_t destr_thunk = task->data1.destructors;
886 KMP_ASSERT(destr_thunk);
887 destr_thunk(gtid, task);
890 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
891 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
892 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
895 if (taskdata->td_flags.detachable == TASK_DETACHABLE) {
896 if (taskdata->td_allow_completion_event.type ==
897 KMP_EVENT_ALLOW_COMPLETION) {
899 __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
900 if (taskdata->td_allow_completion_event.type ==
901 KMP_EVENT_ALLOW_COMPLETION) {
903 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
904 taskdata->td_flags.executing = 0;
911 __ompt_task_finish(task, resumed_task, ompt_task_detach);
917 taskdata->td_flags.proxy = TASK_PROXY;
920 __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
925 taskdata->td_flags.complete = 1;
930 __ompt_task_finish(task, resumed_task, ompt_task_complete);
935 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
936 taskdata->td_flags.detachable == TASK_DETACHABLE) {
939 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
940 KMP_DEBUG_ASSERT(children >= 0);
941 if (taskdata->td_taskgroup)
942 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
943 __kmp_release_deps(gtid, taskdata);
944 }
else if (task_team && task_team->tt.tt_found_proxy_tasks) {
947 __kmp_release_deps(gtid, taskdata);
953 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
954 taskdata->td_flags.executing = 0;
959 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
960 gtid, taskdata, children));
966 thread->th.th_current_task = resumed_task;
968 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
972 resumed_task->td_flags.executing = 1;
975 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
976 gtid, taskdata, resumed_task));
982 static void __kmpc_omp_task_complete_if0_template(
ident_t *loc_ref,
985 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
986 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
987 __kmp_assert_valid_gtid(gtid);
989 __kmp_task_finish<ompt>(gtid, task, NULL);
991 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
992 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
996 ompt_frame_t *ompt_frame;
997 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
998 ompt_frame->enter_frame = ompt_data_none;
999 ompt_frame->enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
1008 void __kmpc_omp_task_complete_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1010 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
1012 #endif // OMPT_SUPPORT 1019 void __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid,
1022 if (UNLIKELY(ompt_enabled.enabled)) {
1023 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
1027 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
1033 void __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid,
1035 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
1036 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1038 __kmp_task_finish<false>(gtid, task,
1041 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
1042 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1045 #endif // TASK_UNUSED 1058 void __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr,
1059 kmp_team_t *team,
int tid,
int set_curr_task) {
1060 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
1064 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
1065 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE"));
1067 task->td_task_id = KMP_GEN_TASK_ID();
1068 task->td_team = team;
1071 task->td_ident = loc_ref;
1072 task->td_taskwait_ident = NULL;
1073 task->td_taskwait_counter = 0;
1074 task->td_taskwait_thread = 0;
1076 task->td_flags.tiedness = TASK_TIED;
1077 task->td_flags.tasktype = TASK_IMPLICIT;
1078 task->td_flags.proxy = TASK_FULL;
1081 task->td_flags.task_serial = 1;
1082 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1083 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1085 task->td_flags.started = 1;
1086 task->td_flags.executing = 1;
1087 task->td_flags.complete = 0;
1088 task->td_flags.freed = 0;
1090 task->td_depnode = NULL;
1091 task->td_last_tied = task;
1092 task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1094 if (set_curr_task) {
1095 KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
1097 KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
1098 task->td_taskgroup = NULL;
1099 task->td_dephash = NULL;
1100 __kmp_push_current_task_to_thread(this_thr, team, tid);
1102 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
1103 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
1107 if (UNLIKELY(ompt_enabled.enabled))
1108 __ompt_task_init(task, tid);
1111 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
1120 void __kmp_finish_implicit_task(kmp_info_t *thread) {
1121 kmp_taskdata_t *task = thread->th.th_current_task;
1122 if (task->td_dephash) {
1124 task->td_flags.complete = 1;
1125 children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
1126 kmp_tasking_flags_t flags_old = task->td_flags;
1127 if (children == 0 && flags_old.complete == 1) {
1128 kmp_tasking_flags_t flags_new = flags_old;
1129 flags_new.complete = 0;
1130 if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
1131 *RCAST(kmp_int32 *, &flags_old),
1132 *RCAST(kmp_int32 *, &flags_new))) {
1133 KA_TRACE(100, (
"__kmp_finish_implicit_task: T#%d cleans " 1134 "dephash of implicit task %p\n",
1135 thread->th.th_info.ds.ds_gtid, task));
1136 __kmp_dephash_free_entries(thread, task->td_dephash);
1146 void __kmp_free_implicit_task(kmp_info_t *thread) {
1147 kmp_taskdata_t *task = thread->th.th_current_task;
1148 if (task && task->td_dephash) {
1149 __kmp_dephash_free(thread, task->td_dephash);
1150 task->td_dephash = NULL;
1156 static size_t __kmp_round_up_to_val(
size_t size,
size_t val) {
1157 if (size & (val - 1)) {
1159 if (size <= KMP_SIZE_T_MAX - val) {
1178 kmp_task_t *__kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1179 kmp_tasking_flags_t *flags,
1180 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1181 kmp_routine_entry_t task_entry) {
1183 kmp_taskdata_t *taskdata;
1184 kmp_info_t *thread = __kmp_threads[gtid];
1185 kmp_team_t *team = thread->th.th_team;
1186 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1187 size_t shareds_offset;
1189 if (!TCR_4(__kmp_init_middle))
1190 __kmp_middle_initialize();
1192 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " 1193 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1194 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
1195 sizeof_shareds, task_entry));
1197 if (parent_task->td_flags.final) {
1198 if (flags->merged_if0) {
1202 if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
1206 KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1212 if (flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) {
1213 if (flags->proxy == TASK_PROXY) {
1214 flags->tiedness = TASK_UNTIED;
1215 flags->merged_if0 = 1;
1219 if ((thread->th.th_task_team) == NULL) {
1222 KMP_DEBUG_ASSERT(team->t.t_serialized);
1224 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
1226 __kmp_task_team_setup(
1229 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
1231 kmp_task_team_t *task_team = thread->th.th_task_team;
1234 if (!KMP_TASKING_ENABLED(task_team)) {
1237 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1238 __kmp_enable_tasking(task_team, thread);
1239 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1240 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1242 if (thread_data->td.td_deque == NULL) {
1243 __kmp_alloc_task_deque(thread, thread_data);
1247 if (task_team->tt.tt_found_proxy_tasks == FALSE)
1248 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1253 shareds_offset =
sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1254 shareds_offset = __kmp_round_up_to_val(shareds_offset,
sizeof(
void *));
1257 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1259 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1264 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1267 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1270 ANNOTATE_HAPPENS_AFTER(taskdata);
1272 task = KMP_TASKDATA_TO_TASK(taskdata);
1275 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD 1276 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(
double) - 1)) == 0);
1277 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(
double) - 1)) == 0);
1279 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(_Quad) - 1)) == 0);
1280 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(_Quad) - 1)) == 0);
1282 if (sizeof_shareds > 0) {
1284 task->shareds = &((
char *)taskdata)[shareds_offset];
1286 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
1289 task->shareds = NULL;
1291 task->routine = task_entry;
1294 taskdata->td_task_id = KMP_GEN_TASK_ID();
1295 taskdata->td_team = team;
1296 taskdata->td_alloc_thread = thread;
1297 taskdata->td_parent = parent_task;
1298 taskdata->td_level = parent_task->td_level + 1;
1299 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
1300 taskdata->td_ident = loc_ref;
1301 taskdata->td_taskwait_ident = NULL;
1302 taskdata->td_taskwait_counter = 0;
1303 taskdata->td_taskwait_thread = 0;
1304 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1306 if (flags->proxy == TASK_FULL)
1307 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1309 taskdata->td_flags.tiedness = flags->tiedness;
1310 taskdata->td_flags.final = flags->final;
1311 taskdata->td_flags.merged_if0 = flags->merged_if0;
1312 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1313 taskdata->td_flags.proxy = flags->proxy;
1314 taskdata->td_flags.detachable = flags->detachable;
1315 taskdata->td_task_team = thread->th.th_task_team;
1316 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1317 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1320 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1323 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1329 taskdata->td_flags.task_serial =
1330 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1331 taskdata->td_flags.tasking_ser || flags->merged_if0);
1333 taskdata->td_flags.started = 0;
1334 taskdata->td_flags.executing = 0;
1335 taskdata->td_flags.complete = 0;
1336 taskdata->td_flags.freed = 0;
1338 taskdata->td_flags.native = flags->native;
1340 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
1342 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
1343 taskdata->td_taskgroup =
1344 parent_task->td_taskgroup;
1345 taskdata->td_dephash = NULL;
1346 taskdata->td_depnode = NULL;
1347 if (flags->tiedness == TASK_UNTIED)
1348 taskdata->td_last_tied = NULL;
1350 taskdata->td_last_tied = taskdata;
1351 taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1353 if (UNLIKELY(ompt_enabled.enabled))
1354 __ompt_task_init(taskdata, gtid);
1358 if (flags->proxy == TASK_PROXY ||
1359 flags->detachable == TASK_DETACHABLE ||
1360 !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
1362 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
1363 if (parent_task->td_taskgroup)
1364 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
1367 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1368 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
1372 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1373 gtid, taskdata, taskdata->td_parent));
1374 ANNOTATE_HAPPENS_BEFORE(task);
1379 kmp_task_t *__kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1380 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1381 size_t sizeof_shareds,
1382 kmp_routine_entry_t task_entry) {
1384 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1385 __kmp_assert_valid_gtid(gtid);
1386 input_flags->native = FALSE;
1388 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) " 1389 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1390 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1391 input_flags->proxy ?
"proxy" :
"",
1392 input_flags->detachable ?
"detachable" :
"", sizeof_kmp_task_t,
1393 sizeof_shareds, task_entry));
1395 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1396 sizeof_shareds, task_entry);
1398 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1403 kmp_task_t *__kmpc_omp_target_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1405 size_t sizeof_kmp_task_t,
1406 size_t sizeof_shareds,
1407 kmp_routine_entry_t task_entry,
1408 kmp_int64 device_id) {
1409 return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
1410 sizeof_shareds, task_entry);
1428 kmp_task_t *new_task, kmp_int32 naffins,
1429 kmp_task_affinity_info_t *affin_list) {
1438 static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1439 kmp_taskdata_t *current_task) {
1440 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1444 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1445 gtid, taskdata, current_task));
1446 KMP_DEBUG_ASSERT(task);
1447 if (taskdata->td_flags.proxy == TASK_PROXY &&
1448 taskdata->td_flags.complete == 1) {
1453 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1456 __kmp_bottom_half_finish_proxy(gtid, task);
1458 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for " 1459 "proxy task %p, resuming task %p\n",
1460 gtid, taskdata, current_task));
1468 ompt_thread_info_t oldInfo;
1469 if (UNLIKELY(ompt_enabled.enabled)) {
1471 thread = __kmp_threads[gtid];
1472 oldInfo = thread->th.ompt_thread_info;
1473 thread->th.ompt_thread_info.wait_id = 0;
1474 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1475 ? ompt_state_work_serial
1476 : ompt_state_work_parallel;
1477 taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1482 if (taskdata->td_flags.proxy != TASK_PROXY) {
1483 ANNOTATE_HAPPENS_AFTER(task);
1484 __kmp_task_start(gtid, task, current_task);
1490 if (__kmp_omp_cancellation) {
1491 thread = __kmp_threads[gtid];
1492 kmp_team_t *this_team = thread->th.th_team;
1493 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1494 if ((taskgroup && taskgroup->cancel_request) ||
1495 (this_team->t.t_cancel_request == cancel_parallel)) {
1496 #if OMPT_SUPPORT && OMPT_OPTIONAL 1497 ompt_data_t *task_data;
1498 if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
1499 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
1500 ompt_callbacks.ompt_callback(ompt_callback_cancel)(
1502 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1503 : ompt_cancel_parallel) |
1504 ompt_cancel_discarded_task,
1517 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
1518 taskdata->td_last_tied = current_task->td_last_tied;
1519 KMP_DEBUG_ASSERT(taskdata->td_last_tied);
1521 #if KMP_STATS_ENABLED 1523 switch (KMP_GET_THREAD_STATE()) {
1524 case FORK_JOIN_BARRIER:
1525 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1528 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1531 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1534 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1537 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1540 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1543 #endif // KMP_STATS_ENABLED 1547 if (UNLIKELY(ompt_enabled.enabled))
1548 __ompt_task_start(task, current_task, gtid);
1551 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1552 kmp_uint64 cur_time;
1553 kmp_int32 kmp_itt_count_task =
1554 __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1555 current_task->td_flags.tasktype == TASK_IMPLICIT;
1556 if (kmp_itt_count_task) {
1557 thread = __kmp_threads[gtid];
1559 if (thread->th.th_bar_arrive_time)
1560 cur_time = __itt_get_timestamp();
1562 kmp_itt_count_task = 0;
1564 KMP_FSYNC_ACQUIRED(taskdata);
1567 #ifdef KMP_GOMP_COMPAT 1568 if (taskdata->td_flags.native) {
1569 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1573 (*(task->routine))(gtid, task);
1575 KMP_POP_PARTITIONED_TIMER();
1577 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1578 if (kmp_itt_count_task) {
1580 thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1582 KMP_FSYNC_CANCEL(taskdata);
1583 KMP_FSYNC_RELEASING(taskdata->td_parent);
1589 if (taskdata->td_flags.proxy != TASK_PROXY) {
1590 ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
1592 if (UNLIKELY(ompt_enabled.enabled)) {
1593 thread->th.ompt_thread_info = oldInfo;
1594 if (taskdata->td_flags.tiedness == TASK_TIED) {
1595 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1597 __kmp_task_finish<true>(gtid, task, current_task);
1600 __kmp_task_finish<false>(gtid, task, current_task);
1605 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1606 gtid, taskdata, current_task));
1620 kmp_int32 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid,
1621 kmp_task_t *new_task) {
1622 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1624 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1625 loc_ref, new_taskdata));
1628 kmp_taskdata_t *parent;
1629 if (UNLIKELY(ompt_enabled.enabled)) {
1630 parent = new_taskdata->td_parent;
1631 if (ompt_enabled.ompt_callback_task_create) {
1632 ompt_data_t task_data = ompt_data_none;
1633 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1634 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1635 parent ? &(parent->ompt_task_info.frame) : NULL,
1636 &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
1637 OMPT_GET_RETURN_ADDRESS(0));
1645 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1647 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1648 new_taskdata->td_flags.task_serial = 1;
1649 __kmp_invoke_task(gtid, new_task, current_task);
1654 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " 1655 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1656 gtid, loc_ref, new_taskdata));
1658 ANNOTATE_HAPPENS_BEFORE(new_task);
1660 if (UNLIKELY(ompt_enabled.enabled)) {
1661 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1664 return TASK_CURRENT_NOT_QUEUED;
1678 kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1679 bool serialize_immediate) {
1680 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1684 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1685 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1687 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1688 if (serialize_immediate)
1689 new_taskdata->td_flags.task_serial = 1;
1690 __kmp_invoke_task(gtid, new_task, current_task);
1693 ANNOTATE_HAPPENS_BEFORE(new_task);
1694 return TASK_CURRENT_NOT_QUEUED;
1709 kmp_int32 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid,
1710 kmp_task_t *new_task) {
1712 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1714 #if KMP_DEBUG || OMPT_SUPPORT 1715 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1717 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1719 __kmp_assert_valid_gtid(gtid);
1722 kmp_taskdata_t *parent = NULL;
1723 if (UNLIKELY(ompt_enabled.enabled)) {
1724 if (!new_taskdata->td_flags.started) {
1725 OMPT_STORE_RETURN_ADDRESS(gtid);
1726 parent = new_taskdata->td_parent;
1727 if (!parent->ompt_task_info.frame.enter_frame.ptr) {
1728 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1730 if (ompt_enabled.ompt_callback_task_create) {
1731 ompt_data_t task_data = ompt_data_none;
1732 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1733 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1734 parent ? &(parent->ompt_task_info.frame) : NULL,
1735 &(new_taskdata->ompt_task_info.task_data),
1736 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1737 OMPT_LOAD_RETURN_ADDRESS(gtid));
1742 __ompt_task_finish(new_task,
1743 new_taskdata->ompt_task_info.scheduling_parent,
1745 new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1750 res = __kmp_omp_task(gtid, new_task,
true);
1752 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning " 1753 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1754 gtid, loc_ref, new_taskdata));
1756 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1757 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1776 kmp_int32 __kmp_omp_taskloop_task(
ident_t *loc_ref, kmp_int32 gtid,
1777 kmp_task_t *new_task,
void *codeptr_ra) {
1779 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1781 #if KMP_DEBUG || OMPT_SUPPORT 1782 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1784 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1788 kmp_taskdata_t *parent = NULL;
1789 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
1790 parent = new_taskdata->td_parent;
1791 if (!parent->ompt_task_info.frame.enter_frame.ptr)
1792 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1793 if (ompt_enabled.ompt_callback_task_create) {
1794 ompt_data_t task_data = ompt_data_none;
1795 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1796 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1797 parent ? &(parent->ompt_task_info.frame) : NULL,
1798 &(new_taskdata->ompt_task_info.task_data),
1799 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1805 res = __kmp_omp_task(gtid, new_task,
true);
1807 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning " 1808 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1809 gtid, loc_ref, new_taskdata));
1811 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1812 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1818 template <
bool ompt>
1819 static kmp_int32 __kmpc_omp_taskwait_template(
ident_t *loc_ref, kmp_int32 gtid,
1820 void *frame_address,
1821 void *return_address) {
1822 kmp_taskdata_t *taskdata;
1824 int thread_finished = FALSE;
1825 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
1827 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
1828 __kmp_assert_valid_gtid(gtid);
1830 if (__kmp_tasking_mode != tskm_immediate_exec) {
1831 thread = __kmp_threads[gtid];
1832 taskdata = thread->th.th_current_task;
1834 #if OMPT_SUPPORT && OMPT_OPTIONAL 1835 ompt_data_t *my_task_data;
1836 ompt_data_t *my_parallel_data;
1839 my_task_data = &(taskdata->ompt_task_info.task_data);
1840 my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
1842 taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
1844 if (ompt_enabled.ompt_callback_sync_region) {
1845 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1846 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1847 my_task_data, return_address);
1850 if (ompt_enabled.ompt_callback_sync_region_wait) {
1851 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1852 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1853 my_task_data, return_address);
1856 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1863 taskdata->td_taskwait_counter += 1;
1864 taskdata->td_taskwait_ident = loc_ref;
1865 taskdata->td_taskwait_thread = gtid + 1;
1868 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1869 if (itt_sync_obj != NULL)
1870 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
1874 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
1876 must_wait = must_wait || (thread->th.th_task_team != NULL &&
1877 thread->th.th_task_team->tt.tt_found_proxy_tasks);
1879 kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *,
1880 &(taskdata->td_incomplete_child_tasks)),
1882 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
1883 flag.execute_tasks(thread, gtid, FALSE,
1884 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1885 __kmp_task_stealing_constraint);
1889 if (itt_sync_obj != NULL)
1890 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
1891 KMP_FSYNC_ACQUIRED(taskdata);
1896 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1898 #if OMPT_SUPPORT && OMPT_OPTIONAL 1900 if (ompt_enabled.ompt_callback_sync_region_wait) {
1901 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1902 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1903 my_task_data, return_address);
1905 if (ompt_enabled.ompt_callback_sync_region) {
1906 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1907 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1908 my_task_data, return_address);
1910 taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
1912 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1914 ANNOTATE_HAPPENS_AFTER(taskdata);
1917 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " 1918 "returning TASK_CURRENT_NOT_QUEUED\n",
1921 return TASK_CURRENT_NOT_QUEUED;
1924 #if OMPT_SUPPORT && OMPT_OPTIONAL 1926 static kmp_int32 __kmpc_omp_taskwait_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1927 void *frame_address,
1928 void *return_address) {
1929 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
1932 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1936 kmp_int32 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid) {
1937 #if OMPT_SUPPORT && OMPT_OPTIONAL 1938 if (UNLIKELY(ompt_enabled.enabled)) {
1939 OMPT_STORE_RETURN_ADDRESS(gtid);
1940 return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
1941 OMPT_LOAD_RETURN_ADDRESS(gtid));
1944 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
1948 kmp_int32 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part) {
1949 kmp_taskdata_t *taskdata;
1951 int thread_finished = FALSE;
1954 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
1956 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1957 gtid, loc_ref, end_part));
1958 __kmp_assert_valid_gtid(gtid);
1960 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
1961 thread = __kmp_threads[gtid];
1962 taskdata = thread->th.th_current_task;
1969 taskdata->td_taskwait_counter += 1;
1970 taskdata->td_taskwait_ident = loc_ref;
1971 taskdata->td_taskwait_thread = gtid + 1;
1974 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1975 if (itt_sync_obj != NULL)
1976 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
1978 if (!taskdata->td_flags.team_serial) {
1979 kmp_task_team_t *task_team = thread->th.th_task_team;
1980 if (task_team != NULL) {
1981 if (KMP_TASKING_ENABLED(task_team)) {
1983 if (UNLIKELY(ompt_enabled.enabled))
1984 thread->th.ompt_thread_info.ompt_task_yielded = 1;
1986 __kmp_execute_tasks_32(
1987 thread, gtid, NULL, FALSE,
1988 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1989 __kmp_task_stealing_constraint);
1991 if (UNLIKELY(ompt_enabled.enabled))
1992 thread->th.ompt_thread_info.ompt_task_yielded = 0;
1998 if (itt_sync_obj != NULL)
1999 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
2004 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2007 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, " 2008 "returning TASK_CURRENT_NOT_QUEUED\n",
2011 return TASK_CURRENT_NOT_QUEUED;
2032 unsigned reserved31 : 31;
2087 item.reduce_orig = NULL;
2092 if (src.reduce_orig != NULL) {
2093 item.reduce_orig = src.reduce_orig;
2095 item.reduce_orig = src.reduce_shar;
2103 ((void (*)(
void *))item.reduce_init)((
char *)(item.reduce_priv) + offset);
2108 ((void (*)(
void *,
void *))item.reduce_init)(
2109 (
char *)(item.reduce_priv) + offset, item.reduce_orig);
2112 template <
typename T>
2113 void *__kmp_task_reduction_init(
int gtid,
int num, T *data) {
2114 __kmp_assert_valid_gtid(gtid);
2115 kmp_info_t *thread = __kmp_threads[gtid];
2116 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2117 kmp_int32 nth = thread->th.th_team_nproc;
2121 KMP_ASSERT(tg != NULL);
2122 KMP_ASSERT(data != NULL);
2123 KMP_ASSERT(num > 0);
2125 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
2129 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
2133 for (
int i = 0; i < num; ++i) {
2134 size_t size = data[i].reduce_size - 1;
2136 size += CACHE_LINE - size % CACHE_LINE;
2137 KMP_ASSERT(data[i].reduce_comb != NULL);
2140 arr[i].
flags = data[i].flags;
2144 __kmp_assign_orig<T>(arr[i], data[i]);
2145 if (!arr[i].flags.lazy_priv) {
2147 arr[i].reduce_priv = __kmp_allocate(nth * size);
2148 arr[i].reduce_pend = (
char *)(arr[i].reduce_priv) + nth * size;
2149 if (arr[i].reduce_init != NULL) {
2151 for (
int j = 0; j < nth; ++j) {
2152 __kmp_call_init<T>(arr[i], j * size);
2159 arr[i].reduce_priv = __kmp_allocate(nth *
sizeof(
void *));
2162 tg->reduce_data = (
void *)arr;
2163 tg->reduce_num_data = num;
2202 template <
typename T>
2203 void __kmp_task_reduction_init_copy(kmp_info_t *thr,
int num, T *data,
2204 kmp_taskgroup_t *tg,
void *reduce_data) {
2206 KA_TRACE(20, (
"__kmp_task_reduction_init_copy: Th %p, init taskgroup %p," 2208 thr, tg, reduce_data));
2213 for (
int i = 0; i < num; ++i) {
2216 tg->reduce_data = (
void *)arr;
2217 tg->reduce_num_data = num;
2230 __kmp_assert_valid_gtid(gtid);
2231 kmp_info_t *thread = __kmp_threads[gtid];
2232 kmp_int32 nth = thread->th.th_team_nproc;
2236 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
2238 tg = thread->th.th_current_task->td_taskgroup;
2239 KMP_ASSERT(tg != NULL);
2241 kmp_int32 num = tg->reduce_num_data;
2242 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2244 KMP_ASSERT(data != NULL);
2245 while (tg != NULL) {
2246 for (
int i = 0; i < num; ++i) {
2247 if (!arr[i].flags.lazy_priv) {
2248 if (data == arr[i].reduce_shar ||
2249 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
2250 return (
char *)(arr[i].
reduce_priv) + tid * arr[i].reduce_size;
2253 void **p_priv = (
void **)(arr[i].reduce_priv);
2254 if (data == arr[i].reduce_shar)
2257 for (
int j = 0; j < nth; ++j)
2258 if (data == p_priv[j])
2262 if (p_priv[tid] == NULL) {
2264 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
2265 if (arr[i].reduce_init != NULL) {
2266 if (arr[i].reduce_orig != NULL) {
2268 p_priv[tid], arr[i].reduce_orig);
2270 ((void (*)(
void *))arr[i].
reduce_init)(p_priv[tid]);
2279 num = tg->reduce_num_data;
2281 KMP_ASSERT2(0,
"Unknown task reduction item");
2287 static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
2288 kmp_int32 nth = th->th.th_team_nproc;
2289 KMP_DEBUG_ASSERT(nth > 1);
2291 kmp_int32 num = tg->reduce_num_data;
2292 for (
int i = 0; i < num; ++i) {
2294 void (*f_fini)(
void *) = (
void (*)(
void *))(arr[i].
reduce_fini);
2295 void (*f_comb)(
void *,
void *) =
2297 if (!arr[i].flags.lazy_priv) {
2300 for (
int j = 0; j < nth; ++j) {
2301 void *priv_data = (
char *)pr_data + j * size;
2302 f_comb(sh_data, priv_data);
2307 void **pr_data = (
void **)(arr[i].reduce_priv);
2308 for (
int j = 0; j < nth; ++j) {
2309 if (pr_data[j] != NULL) {
2310 f_comb(sh_data, pr_data[j]);
2313 __kmp_free(pr_data[j]);
2317 __kmp_free(arr[i].reduce_priv);
2319 __kmp_thread_free(th, arr);
2320 tg->reduce_data = NULL;
2321 tg->reduce_num_data = 0;
2327 static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
2328 __kmp_thread_free(th, tg->reduce_data);
2329 tg->reduce_data = NULL;
2330 tg->reduce_num_data = 0;
2333 template <
typename T>
2334 void *__kmp_task_reduction_modifier_init(
ident_t *loc,
int gtid,
int is_ws,
2336 __kmp_assert_valid_gtid(gtid);
2337 kmp_info_t *thr = __kmp_threads[gtid];
2338 kmp_int32 nth = thr->th.th_team_nproc;
2339 __kmpc_taskgroup(loc, gtid);
2342 (
"__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",
2343 gtid, thr->th.th_current_task->td_taskgroup));
2344 return (
void *)thr->th.th_current_task->td_taskgroup;
2346 kmp_team_t *team = thr->th.th_team;
2348 kmp_taskgroup_t *tg;
2349 reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
2350 if (reduce_data == NULL &&
2351 __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2354 KMP_DEBUG_ASSERT(reduce_data == NULL);
2356 tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
2360 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0);
2361 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0);
2362 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data);
2365 (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) ==
2369 KMP_DEBUG_ASSERT(reduce_data > (
void *)1);
2370 tg = thr->th.th_current_task->td_taskgroup;
2371 __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
2393 int num,
void *data) {
2394 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2414 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2427 __kmpc_end_taskgroup(loc, gtid);
2431 void __kmpc_taskgroup(
ident_t *loc,
int gtid) {
2432 __kmp_assert_valid_gtid(gtid);
2433 kmp_info_t *thread = __kmp_threads[gtid];
2434 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2435 kmp_taskgroup_t *tg_new =
2436 (kmp_taskgroup_t *)__kmp_thread_malloc(thread,
sizeof(kmp_taskgroup_t));
2437 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
2438 KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
2439 KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
2440 tg_new->parent = taskdata->td_taskgroup;
2441 tg_new->reduce_data = NULL;
2442 tg_new->reduce_num_data = 0;
2443 taskdata->td_taskgroup = tg_new;
2445 #if OMPT_SUPPORT && OMPT_OPTIONAL 2446 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2447 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2449 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2450 kmp_team_t *team = thread->th.th_team;
2451 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2453 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2455 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2456 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2457 &(my_task_data), codeptr);
2464 void __kmpc_end_taskgroup(
ident_t *loc,
int gtid) {
2465 __kmp_assert_valid_gtid(gtid);
2466 kmp_info_t *thread = __kmp_threads[gtid];
2467 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2468 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2469 int thread_finished = FALSE;
2471 #if OMPT_SUPPORT && OMPT_OPTIONAL 2473 ompt_data_t my_task_data;
2474 ompt_data_t my_parallel_data;
2476 if (UNLIKELY(ompt_enabled.enabled)) {
2477 team = thread->th.th_team;
2478 my_task_data = taskdata->ompt_task_info.task_data;
2480 my_parallel_data = team->t.ompt_team_info.parallel_data;
2481 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2483 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2487 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
2488 KMP_DEBUG_ASSERT(taskgroup != NULL);
2489 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
2491 if (__kmp_tasking_mode != tskm_immediate_exec) {
2493 taskdata->td_taskwait_counter += 1;
2494 taskdata->td_taskwait_ident = loc;
2495 taskdata->td_taskwait_thread = gtid + 1;
2499 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
2500 if (itt_sync_obj != NULL)
2501 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
2504 #if OMPT_SUPPORT && OMPT_OPTIONAL 2505 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2506 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2507 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2508 &(my_task_data), codeptr);
2512 if (!taskdata->td_flags.team_serial ||
2513 (thread->th.th_task_team != NULL &&
2514 thread->th.th_task_team->tt.tt_found_proxy_tasks)) {
2515 kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)),
2517 while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
2518 flag.execute_tasks(thread, gtid, FALSE,
2519 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2520 __kmp_task_stealing_constraint);
2523 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2525 #if OMPT_SUPPORT && OMPT_OPTIONAL 2526 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2527 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2528 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2529 &(my_task_data), codeptr);
2534 if (itt_sync_obj != NULL)
2535 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
2536 KMP_FSYNC_ACQUIRED(taskdata);
2539 KMP_DEBUG_ASSERT(taskgroup->count == 0);
2541 if (taskgroup->reduce_data != NULL) {
2544 kmp_team_t *t = thread->th.th_team;
2548 if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL &&
2551 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]);
2552 if (cnt == thread->th.th_team_nproc - 1) {
2555 __kmp_task_reduction_fini(thread, taskgroup);
2558 __kmp_thread_free(thread, reduce_data);
2559 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL);
2560 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0);
2564 __kmp_task_reduction_clean(thread, taskgroup);
2566 }
else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) !=
2570 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]);
2571 if (cnt == thread->th.th_team_nproc - 1) {
2573 __kmp_task_reduction_fini(thread, taskgroup);
2576 __kmp_thread_free(thread, reduce_data);
2577 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL);
2578 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0);
2582 __kmp_task_reduction_clean(thread, taskgroup);
2586 __kmp_task_reduction_fini(thread, taskgroup);
2590 taskdata->td_taskgroup = taskgroup->parent;
2591 __kmp_thread_free(thread, taskgroup);
2593 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
2595 ANNOTATE_HAPPENS_AFTER(taskdata);
2597 #if OMPT_SUPPORT && OMPT_OPTIONAL 2598 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2599 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2600 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2601 &(my_task_data), codeptr);
2607 static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
2608 kmp_task_team_t *task_team,
2609 kmp_int32 is_constrained) {
2611 kmp_taskdata_t *taskdata;
2612 kmp_thread_data_t *thread_data;
2615 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2616 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
2619 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
2621 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
2622 gtid, thread_data->td.td_deque_ntasks,
2623 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2625 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2627 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: " 2628 "ntasks=%d head=%u tail=%u\n",
2629 gtid, thread_data->td.td_deque_ntasks,
2630 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2634 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2636 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2637 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2639 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " 2640 "ntasks=%d head=%u tail=%u\n",
2641 gtid, thread_data->td.td_deque_ntasks,
2642 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2646 tail = (thread_data->td.td_deque_tail - 1) &
2647 TASK_DEQUE_MASK(thread_data->td);
2648 taskdata = thread_data->td.td_deque[tail];
2650 if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
2651 thread->th.th_current_task)) {
2653 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2655 (
"__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: " 2656 "ntasks=%d head=%u tail=%u\n",
2657 gtid, thread_data->td.td_deque_ntasks,
2658 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2662 thread_data->td.td_deque_tail = tail;
2663 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
2665 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2667 KA_TRACE(10, (
"__kmp_remove_my_task(exit #4): T#%d task %p removed: " 2668 "ntasks=%d head=%u tail=%u\n",
2669 gtid, taskdata, thread_data->td.td_deque_ntasks,
2670 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2672 task = KMP_TASKDATA_TO_TASK(taskdata);
2679 static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
2680 kmp_task_team_t *task_team,
2681 std::atomic<kmp_int32> *unfinished_threads,
2682 int *thread_finished,
2683 kmp_int32 is_constrained) {
2685 kmp_taskdata_t *taskdata;
2686 kmp_taskdata_t *current;
2687 kmp_thread_data_t *victim_td, *threads_data;
2689 kmp_int32 victim_tid;
2691 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2693 threads_data = task_team->tt.tt_threads_data;
2694 KMP_DEBUG_ASSERT(threads_data != NULL);
2696 victim_tid = victim_thr->th.th_info.ds.ds_tid;
2697 victim_td = &threads_data[victim_tid];
2699 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: " 2700 "task_team=%p ntasks=%d head=%u tail=%u\n",
2701 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2702 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2703 victim_td->td.td_deque_tail));
2705 if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
2706 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: " 2707 "task_team=%p ntasks=%d head=%u tail=%u\n",
2708 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2709 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2710 victim_td->td.td_deque_tail));
2714 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
2716 int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
2719 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2720 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: " 2721 "task_team=%p ntasks=%d head=%u tail=%u\n",
2722 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2723 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2727 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
2728 current = __kmp_threads[gtid]->th.th_current_task;
2729 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
2730 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2732 victim_td->td.td_deque_head =
2733 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
2735 if (!task_team->tt.tt_untied_task_encountered) {
2737 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2738 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d could not steal from " 2739 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2740 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2741 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2746 target = victim_td->td.td_deque_head;
2748 for (i = 1; i < ntasks; ++i) {
2749 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2750 taskdata = victim_td->td.td_deque[target];
2751 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2757 if (taskdata == NULL) {
2759 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2760 KA_TRACE(10, (
"__kmp_steal_task(exit #4): T#%d could not steal from " 2761 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2762 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2763 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2767 for (i = i + 1; i < ntasks; ++i) {
2769 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2770 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
2774 victim_td->td.td_deque_tail ==
2775 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
2776 victim_td->td.td_deque_tail = target;
2778 if (*thread_finished) {
2784 count = KMP_ATOMIC_INC(unfinished_threads);
2788 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
2789 gtid, count + 1, task_team));
2791 *thread_finished = FALSE;
2793 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
2795 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2799 (
"__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: " 2800 "task_team=%p ntasks=%d head=%u tail=%u\n",
2801 gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
2802 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2804 task = KMP_TASKDATA_TO_TASK(taskdata);
2818 static inline int __kmp_execute_tasks_template(
2819 kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
2820 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2821 kmp_int32 is_constrained) {
2822 kmp_task_team_t *task_team = thread->th.th_task_team;
2823 kmp_thread_data_t *threads_data;
2825 kmp_info_t *other_thread;
2826 kmp_taskdata_t *current_task = thread->th.th_current_task;
2827 std::atomic<kmp_int32> *unfinished_threads;
2828 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
2829 tid = thread->th.th_info.ds.ds_tid;
2831 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2832 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
2834 if (task_team == NULL || current_task == NULL)
2837 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d " 2838 "*thread_finished=%d\n",
2839 gtid, final_spin, *thread_finished));
2841 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
2842 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2843 KMP_DEBUG_ASSERT(threads_data != NULL);
2845 nthreads = task_team->tt.tt_nproc;
2846 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
2847 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
2848 KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
2854 if (use_own_tasks) {
2855 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
2857 if ((task == NULL) && (nthreads > 1)) {
2861 if (victim_tid == -2) {
2862 victim_tid = threads_data[tid].td.td_deque_last_stolen;
2865 other_thread = threads_data[victim_tid].td.td_thr;
2867 if (victim_tid != -1) {
2869 }
else if (!new_victim) {
2875 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
2876 if (victim_tid >= tid) {
2880 other_thread = threads_data[victim_tid].td.td_thr;
2890 if ((__kmp_tasking_mode == tskm_task_teams) &&
2891 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
2892 (TCR_PTR(CCAST(
void *, other_thread->th.th_sleep_loc)) !=
2895 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
2896 other_thread->th.th_sleep_loc);
2909 task = __kmp_steal_task(other_thread, gtid, task_team,
2910 unfinished_threads, thread_finished,
2914 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
2915 threads_data[tid].td.td_deque_last_stolen = victim_tid;
2922 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
2931 #if USE_ITT_BUILD && USE_ITT_NOTIFY 2932 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2933 if (itt_sync_obj == NULL) {
2935 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2937 __kmp_itt_task_starting(itt_sync_obj);
2940 __kmp_invoke_task(gtid, task, current_task);
2942 if (itt_sync_obj != NULL)
2943 __kmp_itt_task_finished(itt_sync_obj);
2950 if (flag == NULL || (!final_spin && flag->done_check())) {
2953 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2957 if (thread->th.th_task_team == NULL) {
2960 KMP_YIELD(__kmp_library == library_throughput);
2963 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
2964 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned " 2965 "other tasks, restart\n",
2976 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0) {
2980 if (!*thread_finished) {
2983 count = KMP_ATOMIC_DEC(unfinished_threads) - 1;
2984 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec " 2985 "unfinished_threads to %d task_team=%p\n",
2986 gtid, count, task_team));
2987 *thread_finished = TRUE;
2995 if (flag != NULL && flag->done_check()) {
2998 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3006 if (thread->th.th_task_team == NULL) {
3008 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
3018 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
3024 int __kmp_execute_tasks_32(
3025 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag,
int final_spin,
3026 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3027 kmp_int32 is_constrained) {
3028 return __kmp_execute_tasks_template(
3029 thread, gtid, flag, final_spin,
3030 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3033 int __kmp_execute_tasks_64(
3034 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag,
int final_spin,
3035 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3036 kmp_int32 is_constrained) {
3037 return __kmp_execute_tasks_template(
3038 thread, gtid, flag, final_spin,
3039 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3042 int __kmp_execute_tasks_oncore(
3043 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
3044 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3045 kmp_int32 is_constrained) {
3046 return __kmp_execute_tasks_template(
3047 thread, gtid, flag, final_spin,
3048 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3054 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
3055 kmp_info_t *this_thr) {
3056 kmp_thread_data_t *threads_data;
3057 int nthreads, i, is_init_thread;
3059 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
3060 __kmp_gtid_from_thread(this_thr)));
3062 KMP_DEBUG_ASSERT(task_team != NULL);
3063 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
3065 nthreads = task_team->tt.tt_nproc;
3066 KMP_DEBUG_ASSERT(nthreads > 0);
3067 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
3070 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
3072 if (!is_init_thread) {
3076 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
3077 __kmp_gtid_from_thread(this_thr)));
3080 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3081 KMP_DEBUG_ASSERT(threads_data != NULL);
3083 if (__kmp_tasking_mode == tskm_task_teams &&
3084 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
3088 for (i = 0; i < nthreads; i++) {
3089 volatile void *sleep_loc;
3090 kmp_info_t *thread = threads_data[i].td.td_thr;
3092 if (i == this_thr->th.th_info.ds.ds_tid) {
3101 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3103 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
3104 __kmp_gtid_from_thread(this_thr),
3105 __kmp_gtid_from_thread(thread)));
3106 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
3108 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
3109 __kmp_gtid_from_thread(this_thr),
3110 __kmp_gtid_from_thread(thread)));
3115 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
3116 __kmp_gtid_from_thread(this_thr)));
3149 static kmp_task_team_t *__kmp_free_task_teams =
3152 kmp_bootstrap_lock_t __kmp_task_team_lock =
3153 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
3160 static void __kmp_alloc_task_deque(kmp_info_t *thread,
3161 kmp_thread_data_t *thread_data) {
3162 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
3163 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
3166 thread_data->td.td_deque_last_stolen = -1;
3168 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
3169 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
3170 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
3174 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
3175 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
3179 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
3180 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
3181 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
3187 static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
3188 if (thread_data->td.td_deque != NULL) {
3189 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3190 TCW_4(thread_data->td.td_deque_ntasks, 0);
3191 __kmp_free(thread_data->td.td_deque);
3192 thread_data->td.td_deque = NULL;
3193 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3196 #ifdef BUILD_TIED_TASK_STACK 3198 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
3199 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
3201 #endif // BUILD_TIED_TASK_STACK 3211 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
3212 kmp_task_team_t *task_team) {
3213 kmp_thread_data_t **threads_data_p;
3214 kmp_int32 nthreads, maxthreads;
3215 int is_init_thread = FALSE;
3217 if (TCR_4(task_team->tt.tt_found_tasks)) {
3222 threads_data_p = &task_team->tt.tt_threads_data;
3223 nthreads = task_team->tt.tt_nproc;
3224 maxthreads = task_team->tt.tt_max_threads;
3229 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3231 if (!TCR_4(task_team->tt.tt_found_tasks)) {
3233 kmp_team_t *team = thread->th.th_team;
3236 is_init_thread = TRUE;
3237 if (maxthreads < nthreads) {
3239 if (*threads_data_p != NULL) {
3240 kmp_thread_data_t *old_data = *threads_data_p;
3241 kmp_thread_data_t *new_data = NULL;
3245 (
"__kmp_realloc_task_threads_data: T#%d reallocating " 3246 "threads data for task_team %p, new_size = %d, old_size = %d\n",
3247 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
3252 new_data = (kmp_thread_data_t *)__kmp_allocate(
3253 nthreads *
sizeof(kmp_thread_data_t));
3255 KMP_MEMCPY_S((
void *)new_data, nthreads *
sizeof(kmp_thread_data_t),
3256 (
void *)old_data, maxthreads *
sizeof(kmp_thread_data_t));
3258 #ifdef BUILD_TIED_TASK_STACK 3260 for (i = maxthreads; i < nthreads; i++) {
3261 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3262 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3264 #endif // BUILD_TIED_TASK_STACK 3266 (*threads_data_p) = new_data;
3267 __kmp_free(old_data);
3269 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating " 3270 "threads data for task_team %p, size = %d\n",
3271 __kmp_gtid_from_thread(thread), task_team, nthreads));
3275 ANNOTATE_IGNORE_WRITES_BEGIN();
3276 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
3277 nthreads *
sizeof(kmp_thread_data_t));
3278 ANNOTATE_IGNORE_WRITES_END();
3279 #ifdef BUILD_TIED_TASK_STACK 3281 for (i = 0; i < nthreads; i++) {
3282 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3283 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3285 #endif // BUILD_TIED_TASK_STACK 3287 task_team->tt.tt_max_threads = nthreads;
3290 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
3294 for (i = 0; i < nthreads; i++) {
3295 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3296 thread_data->td.td_thr = team->t.t_threads[i];
3298 if (thread_data->td.td_deque_last_stolen >= nthreads) {
3302 thread_data->td.td_deque_last_stolen = -1;
3307 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
3310 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3311 return is_init_thread;
3317 static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
3318 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3319 if (task_team->tt.tt_threads_data != NULL) {
3321 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3322 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3324 __kmp_free(task_team->tt.tt_threads_data);
3325 task_team->tt.tt_threads_data = NULL;
3327 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3334 static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
3336 kmp_task_team_t *task_team = NULL;
3339 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
3340 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
3342 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3344 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3345 if (__kmp_free_task_teams != NULL) {
3346 task_team = __kmp_free_task_teams;
3347 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
3348 task_team->tt.tt_next = NULL;
3350 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3353 if (task_team == NULL) {
3354 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating " 3355 "task team for team %p\n",
3356 __kmp_gtid_from_thread(thread), team));
3359 task_team = (kmp_task_team_t *)__kmp_allocate(
sizeof(kmp_task_team_t));
3360 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3361 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG 3364 __itt_suppress_mark_range(
3365 __itt_suppress_range, __itt_suppress_threading_errors,
3366 &task_team->tt.tt_found_tasks,
sizeof(task_team->tt.tt_found_tasks));
3367 __itt_suppress_mark_range(__itt_suppress_range,
3368 __itt_suppress_threading_errors,
3369 CCAST(kmp_uint32 *, &task_team->tt.tt_active),
3370 sizeof(task_team->tt.tt_active));
3378 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3379 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3380 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
3382 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads);
3383 TCW_4(task_team->tt.tt_active, TRUE);
3385 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p " 3386 "unfinished_threads init'd to %d\n",
3387 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
3388 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
3395 void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
3396 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
3397 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
3400 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3402 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
3403 task_team->tt.tt_next = __kmp_free_task_teams;
3404 TCW_PTR(__kmp_free_task_teams, task_team);
3406 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3414 void __kmp_reap_task_teams(
void) {
3415 kmp_task_team_t *task_team;
3417 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3419 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3420 while ((task_team = __kmp_free_task_teams) != NULL) {
3421 __kmp_free_task_teams = task_team->tt.tt_next;
3422 task_team->tt.tt_next = NULL;
3425 if (task_team->tt.tt_threads_data != NULL) {
3426 __kmp_free_task_threads_data(task_team);
3428 __kmp_free(task_team);
3430 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3437 void __kmp_wait_to_unref_task_teams(
void) {
3442 KMP_INIT_YIELD(spins);
3450 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
3451 thread = thread->th.th_next_pool) {
3455 if (TCR_PTR(thread->th.th_task_team) == NULL) {
3456 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
3457 __kmp_gtid_from_thread(thread)));
3462 if (!__kmp_is_thread_alive(thread, &exit_val)) {
3463 thread->th.th_task_team = NULL;
3470 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to " 3471 "unreference task_team\n",
3472 __kmp_gtid_from_thread(thread)));
3474 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
3475 volatile void *sleep_loc;
3477 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3481 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
3482 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
3483 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
3492 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
3498 void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
int always) {
3499 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3505 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
3506 (always || team->t.t_nproc > 1)) {
3507 team->t.t_task_team[this_thr->th.th_task_state] =
3508 __kmp_allocate_task_team(this_thr, team);
3509 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p " 3510 "for team %d at parity=%d\n",
3511 __kmp_gtid_from_thread(this_thr),
3512 team->t.t_task_team[this_thr->th.th_task_state],
3513 ((team != NULL) ? team->t.t_id : -1),
3514 this_thr->th.th_task_state));
3524 if (team->t.t_nproc > 1) {
3525 int other_team = 1 - this_thr->th.th_task_state;
3526 if (team->t.t_task_team[other_team] == NULL) {
3527 team->t.t_task_team[other_team] =
3528 __kmp_allocate_task_team(this_thr, team);
3529 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created second new " 3530 "task_team %p for team %d at parity=%d\n",
3531 __kmp_gtid_from_thread(this_thr),
3532 team->t.t_task_team[other_team],
3533 ((team != NULL) ? team->t.t_id : -1), other_team));
3536 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
3537 if (!task_team->tt.tt_active ||
3538 team->t.t_nproc != task_team->tt.tt_nproc) {
3539 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
3540 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3541 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3542 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,
3544 TCW_4(task_team->tt.tt_active, TRUE);
3548 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d reset next task_team " 3549 "%p for team %d at parity=%d\n",
3550 __kmp_gtid_from_thread(this_thr),
3551 team->t.t_task_team[other_team],
3552 ((team != NULL) ? team->t.t_id : -1), other_team));
3560 void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
3561 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3565 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
3568 TCW_PTR(this_thr->th.th_task_team,
3569 team->t.t_task_team[this_thr->th.th_task_state]);
3571 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team " 3572 "%p from Team #%d (parity=%d)\n",
3573 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
3574 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
3584 void __kmp_task_team_wait(
3585 kmp_info_t *this_thr,
3586 kmp_team_t *team USE_ITT_BUILD_ARG(
void *itt_sync_obj),
int wait) {
3587 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
3589 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3590 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
3592 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
3594 KA_TRACE(20, (
"__kmp_task_team_wait: Master T#%d waiting for all tasks " 3595 "(for unfinished_threads to reach 0) on task_team = %p\n",
3596 __kmp_gtid_from_thread(this_thr), task_team));
3600 kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *,
3601 &task_team->tt.tt_unfinished_threads),
3603 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
3609 (
"__kmp_task_team_wait: Master T#%d deactivating task_team %p: " 3610 "setting active to false, setting local and team's pointer to NULL\n",
3611 __kmp_gtid_from_thread(this_thr), task_team));
3612 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
3613 task_team->tt.tt_found_proxy_tasks == TRUE);
3614 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3615 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
3616 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
3619 TCW_PTR(this_thr->th.th_task_team, NULL);
3628 void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid) {
3629 std::atomic<kmp_uint32> *spin = RCAST(
3630 std::atomic<kmp_uint32> *,
3631 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
3633 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
3636 KMP_FSYNC_SPIN_INIT(spin, NULL);
3638 kmp_flag_32 spin_flag(spin, 0U);
3639 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
3640 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
3643 KMP_FSYNC_SPIN_PREPARE(RCAST(
void *, spin));
3646 if (TCR_4(__kmp_global.g.g_done)) {
3647 if (__kmp_global.g.g_abort)
3648 __kmp_abort_thread();
3654 KMP_FSYNC_SPIN_ACQUIRED(RCAST(
void *, spin));
3663 static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
3665 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3666 kmp_task_team_t *task_team = taskdata->td_task_team;
3668 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
3672 KMP_DEBUG_ASSERT(task_team != NULL);
3674 bool result =
false;
3675 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
3677 if (thread_data->td.td_deque == NULL) {
3681 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
3686 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3687 TASK_DEQUE_SIZE(thread_data->td)) {
3690 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
3695 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3698 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3699 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3700 TASK_DEQUE_SIZE(thread_data->td)) {
3702 __kmp_realloc_task_deque(thread, thread_data);
3707 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3709 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3710 TASK_DEQUE_SIZE(thread_data->td)) {
3711 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to " 3717 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3718 goto release_and_exit;
3720 __kmp_realloc_task_deque(thread, thread_data);
3726 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
3728 thread_data->td.td_deque_tail =
3729 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
3730 TCW_4(thread_data->td.td_deque_ntasks,
3731 TCR_4(thread_data->td.td_deque_ntasks) + 1);
3734 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
3738 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3759 static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3760 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
3761 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3762 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
3763 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
3765 taskdata->td_flags.complete = 1;
3767 if (taskdata->td_taskgroup)
3768 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
3772 KMP_ATOMIC_INC(&taskdata->td_incomplete_child_tasks);
3775 static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3776 kmp_int32 children = 0;
3780 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
3781 KMP_DEBUG_ASSERT(children >= 0);
3784 KMP_ATOMIC_DEC(&taskdata->td_incomplete_child_tasks);
3787 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
3788 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3789 kmp_info_t *thread = __kmp_threads[gtid];
3791 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3792 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
3797 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) > 0)
3800 __kmp_release_deps(gtid, taskdata);
3801 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
3813 KMP_DEBUG_ASSERT(ptask != NULL);
3814 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3816 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
3818 __kmp_assert_valid_gtid(gtid);
3819 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3821 __kmp_first_top_half_finish_proxy(taskdata);
3822 __kmp_second_top_half_finish_proxy(taskdata);
3823 __kmp_bottom_half_finish_proxy(gtid, ptask);
3826 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
3838 KMP_DEBUG_ASSERT(ptask != NULL);
3839 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3843 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
3846 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3848 __kmp_first_top_half_finish_proxy(taskdata);
3852 kmp_team_t *team = taskdata->td_team;
3853 kmp_int32 nthreads = team->t.t_nproc;
3858 kmp_int32 start_k = 0;
3860 kmp_int32 k = start_k;
3864 thread = team->t.t_threads[k];
3865 k = (k + 1) % nthreads;
3871 }
while (!__kmp_give_task(thread, k, ptask, pass));
3873 __kmp_second_top_half_finish_proxy(taskdata);
3877 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
3881 kmp_event_t *__kmpc_task_allow_completion_event(
ident_t *loc_ref,
int gtid,
3883 kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
3884 if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
3885 td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
3886 td->td_allow_completion_event.ed.task = task;
3887 __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
3889 return &td->td_allow_completion_event;
3892 void __kmp_fulfill_event(kmp_event_t *event) {
3893 if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
3894 kmp_task_t *ptask =
event->ed.task;
3895 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3896 bool detached =
false;
3897 int gtid = __kmp_get_gtid();
3902 __kmp_acquire_tas_lock(&event->lock, gtid);
3903 if (taskdata->td_flags.proxy == TASK_PROXY) {
3909 if (UNLIKELY(ompt_enabled.enabled))
3910 __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
3913 event->type = KMP_EVENT_UNINITIALIZED;
3914 __kmp_release_tas_lock(&event->lock, gtid);
3920 if (UNLIKELY(ompt_enabled.enabled))
3921 __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
3925 kmp_team_t *team = taskdata->td_team;
3926 kmp_info_t *thread = __kmp_get_thread();
3927 if (thread->th.th_team == team) {
3945 kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
3947 kmp_taskdata_t *taskdata;
3948 kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
3949 kmp_taskdata_t *parent_task = taskdata_src->td_parent;
3950 size_t shareds_offset;
3953 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
3955 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
3957 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
3958 task_size = taskdata_src->td_size_alloc;
3961 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
3964 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
3966 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
3968 KMP_MEMCPY(taskdata, taskdata_src, task_size);
3970 task = KMP_TASKDATA_TO_TASK(taskdata);
3973 taskdata->td_task_id = KMP_GEN_TASK_ID();
3974 if (task->shareds != NULL) {
3975 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
3976 task->shareds = &((
char *)taskdata)[shareds_offset];
3977 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
3980 taskdata->td_alloc_thread = thread;
3981 taskdata->td_parent = parent_task;
3983 taskdata->td_taskgroup = parent_task->td_taskgroup;
3986 if (taskdata->td_flags.tiedness == TASK_TIED)
3987 taskdata->td_last_tied = taskdata;
3991 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
3992 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
3993 if (parent_task->td_taskgroup)
3994 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
3997 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
3998 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
4002 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
4003 thread, taskdata, taskdata->td_parent));
4005 if (UNLIKELY(ompt_enabled.enabled))
4006 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
4015 typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
4017 KMP_BUILD_ASSERT(
sizeof(
long) == 4 ||
sizeof(
long) == 8);
4022 class kmp_taskloop_bounds_t {
4024 const kmp_taskdata_t *taskdata;
4025 size_t lower_offset;
4026 size_t upper_offset;
4029 kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
4030 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
4031 lower_offset((
char *)lb - (
char *)task),
4032 upper_offset((
char *)ub - (
char *)task) {
4033 KMP_DEBUG_ASSERT((
char *)lb > (
char *)_task);
4034 KMP_DEBUG_ASSERT((
char *)ub > (
char *)_task);
4036 kmp_taskloop_bounds_t(kmp_task_t *_task,
const kmp_taskloop_bounds_t &bounds)
4037 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
4038 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
4039 size_t get_lower_offset()
const {
return lower_offset; }
4040 size_t get_upper_offset()
const {
return upper_offset; }
4041 kmp_uint64 get_lb()
const {
4043 #if defined(KMP_GOMP_COMPAT) 4045 if (!taskdata->td_flags.native) {
4046 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4049 if (taskdata->td_size_loop_bounds == 4) {
4050 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
4051 retval = (kmp_int64)*lb;
4053 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
4054 retval = (kmp_int64)*lb;
4058 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4059 #endif // defined(KMP_GOMP_COMPAT) 4062 kmp_uint64 get_ub()
const {
4064 #if defined(KMP_GOMP_COMPAT) 4066 if (!taskdata->td_flags.native) {
4067 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4070 if (taskdata->td_size_loop_bounds == 4) {
4071 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
4072 retval = (kmp_int64)*ub;
4074 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
4075 retval = (kmp_int64)*ub;
4079 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4080 #endif // defined(KMP_GOMP_COMPAT) 4083 void set_lb(kmp_uint64 lb) {
4084 #if defined(KMP_GOMP_COMPAT) 4086 if (!taskdata->td_flags.native) {
4087 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4090 if (taskdata->td_size_loop_bounds == 4) {
4091 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
4092 *lower = (kmp_uint32)lb;
4094 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
4095 *lower = (kmp_uint64)lb;
4099 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4100 #endif // defined(KMP_GOMP_COMPAT) 4102 void set_ub(kmp_uint64 ub) {
4103 #if defined(KMP_GOMP_COMPAT) 4105 if (!taskdata->td_flags.native) {
4106 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4109 if (taskdata->td_size_loop_bounds == 4) {
4110 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
4111 *upper = (kmp_uint32)ub;
4113 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
4114 *upper = (kmp_uint64)ub;
4118 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4119 #endif // defined(KMP_GOMP_COMPAT) 4138 void __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
4139 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4140 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4141 kmp_uint64 grainsize, kmp_uint64 extras,
4148 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
4149 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4151 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4152 kmp_uint64 lower = task_bounds.get_lb();
4153 kmp_uint64 upper = task_bounds.get_ub();
4155 kmp_info_t *thread = __kmp_threads[gtid];
4156 kmp_taskdata_t *current_task = thread->th.th_current_task;
4157 kmp_task_t *next_task;
4158 kmp_int32 lastpriv = 0;
4160 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
4161 KMP_DEBUG_ASSERT(num_tasks > extras);
4162 KMP_DEBUG_ASSERT(num_tasks > 0);
4163 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, " 4164 "extras %lld, i=%lld,%lld(%d)%lld, dup %p\n",
4165 gtid, num_tasks, grainsize, extras, lower, upper, ub_glob, st,
4169 for (i = 0; i < num_tasks; ++i) {
4170 kmp_uint64 chunk_minus_1;
4172 chunk_minus_1 = grainsize - 1;
4174 chunk_minus_1 = grainsize;
4177 upper = lower + st * chunk_minus_1;
4178 if (i == num_tasks - 1) {
4181 KMP_DEBUG_ASSERT(upper == *ub);
4182 if (upper == ub_glob)
4184 }
else if (st > 0) {
4185 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
4186 if ((kmp_uint64)st > ub_glob - upper)
4189 KMP_DEBUG_ASSERT(upper + st < *ub);
4190 if (upper - ub_glob < (kmp_uint64)(-st))
4194 next_task = __kmp_task_dup_alloc(thread, task);
4195 kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
4196 kmp_taskloop_bounds_t next_task_bounds =
4197 kmp_taskloop_bounds_t(next_task, task_bounds);
4200 next_task_bounds.set_lb(lower);
4201 if (next_taskdata->td_flags.native) {
4202 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
4204 next_task_bounds.set_ub(upper);
4206 if (ptask_dup != NULL)
4208 ptask_dup(next_task, task, lastpriv);
4210 (
"__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, " 4211 "upper %lld stride %lld, (offsets %p %p)\n",
4212 gtid, i, next_task, lower, upper, st,
4213 next_task_bounds.get_lower_offset(),
4214 next_task_bounds.get_upper_offset()));
4216 __kmp_omp_taskloop_task(NULL, gtid, next_task,
4219 __kmp_omp_task(gtid, next_task,
true);
4224 __kmp_task_start(gtid, task, current_task);
4226 __kmp_task_finish<false>(gtid, task, current_task);
4231 typedef struct __taskloop_params {
4238 kmp_uint64 num_tasks;
4239 kmp_uint64 grainsize;
4242 kmp_uint64 num_t_min;
4246 } __taskloop_params_t;
4248 void __kmp_taskloop_recur(
ident_t *,
int, kmp_task_t *, kmp_uint64 *,
4249 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
4250 kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64,
4257 int __kmp_taskloop_task(
int gtid,
void *ptask) {
4258 __taskloop_params_t *p =
4259 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
4260 kmp_task_t *task = p->task;
4261 kmp_uint64 *lb = p->lb;
4262 kmp_uint64 *ub = p->ub;
4263 void *task_dup = p->task_dup;
4265 kmp_int64 st = p->st;
4266 kmp_uint64 ub_glob = p->ub_glob;
4267 kmp_uint64 num_tasks = p->num_tasks;
4268 kmp_uint64 grainsize = p->grainsize;
4269 kmp_uint64 extras = p->extras;
4270 kmp_uint64 tc = p->tc;
4271 kmp_uint64 num_t_min = p->num_t_min;
4273 void *codeptr_ra = p->codeptr_ra;
4276 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4277 KMP_DEBUG_ASSERT(task != NULL);
4278 KA_TRACE(20, (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize" 4279 " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
4280 gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
4283 KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
4284 if (num_tasks > num_t_min)
4285 __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4286 grainsize, extras, tc, num_t_min,
4292 __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4293 grainsize, extras, tc,
4299 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
4320 void __kmp_taskloop_recur(
ident_t *loc,
int gtid, kmp_task_t *task,
4321 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4322 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4323 kmp_uint64 grainsize, kmp_uint64 extras,
4324 kmp_uint64 tc, kmp_uint64 num_t_min,
4329 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4330 KMP_DEBUG_ASSERT(task != NULL);
4331 KMP_DEBUG_ASSERT(num_tasks > num_t_min);
4332 KA_TRACE(20, (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize" 4333 " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
4334 gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
4336 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4337 kmp_uint64 lower = *lb;
4338 kmp_info_t *thread = __kmp_threads[gtid];
4340 kmp_task_t *next_task;
4341 size_t lower_offset =
4342 (
char *)lb - (
char *)task;
4343 size_t upper_offset =
4344 (
char *)ub - (
char *)task;
4346 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
4347 KMP_DEBUG_ASSERT(num_tasks > extras);
4348 KMP_DEBUG_ASSERT(num_tasks > 0);
4351 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
4352 kmp_uint64 gr_size0 = grainsize;
4353 kmp_uint64 n_tsk0 = num_tasks >> 1;
4354 kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
4355 if (n_tsk0 <= extras) {
4358 ext1 = extras - n_tsk0;
4359 tc0 = gr_size0 * n_tsk0;
4364 tc1 = grainsize * n_tsk1;
4367 ub0 = lower + st * (tc0 - 1);
4371 next_task = __kmp_task_dup_alloc(thread, task);
4373 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
4374 if (ptask_dup != NULL)
4375 ptask_dup(next_task, task, 0);
4380 kmp_taskdata_t *current_task = thread->th.th_current_task;
4381 thread->th.th_current_task = taskdata->td_parent;
4382 kmp_task_t *new_task =
4383 __kmpc_omp_task_alloc(loc, gtid, 1, 3 *
sizeof(
void *),
4384 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
4386 thread->th.th_current_task = current_task;
4387 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
4388 p->task = next_task;
4389 p->lb = (kmp_uint64 *)((
char *)next_task + lower_offset);
4390 p->ub = (kmp_uint64 *)((
char *)next_task + upper_offset);
4391 p->task_dup = task_dup;
4393 p->ub_glob = ub_glob;
4394 p->num_tasks = n_tsk1;
4395 p->grainsize = grainsize;
4398 p->num_t_min = num_t_min;
4400 p->codeptr_ra = codeptr_ra;
4405 __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
4407 __kmp_omp_task(gtid, new_task,
true);
4411 if (n_tsk0 > num_t_min)
4412 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
4413 ext0, tc0, num_t_min,
4419 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
4420 gr_size0, ext0, tc0,
4426 KA_TRACE(40, (
"__kmpc_taskloop_recur(exit): T#%d\n", gtid));
4446 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup,
4447 int sched, kmp_uint64 grainsize,
void *task_dup) {
4448 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4449 KMP_DEBUG_ASSERT(task != NULL);
4450 __kmp_assert_valid_gtid(gtid);
4452 #if OMPT_SUPPORT && OMPT_OPTIONAL 4453 OMPT_STORE_RETURN_ADDRESS(gtid);
4455 __kmpc_taskgroup(loc, gtid);
4460 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4463 kmp_uint64 lower = task_bounds.get_lb();
4464 kmp_uint64 upper = task_bounds.get_ub();
4465 kmp_uint64 ub_glob = upper;
4466 kmp_uint64 num_tasks = 0, extras = 0;
4467 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
4468 kmp_info_t *thread = __kmp_threads[gtid];
4469 kmp_taskdata_t *current_task = thread->th.th_current_task;
4471 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, " 4472 "grain %llu(%d), dup %p\n",
4473 gtid, taskdata, lower, upper, st, grainsize, sched, task_dup));
4477 tc = upper - lower + 1;
4478 }
else if (st < 0) {
4479 tc = (lower - upper) / (-st) + 1;
4481 tc = (upper - lower) / st + 1;
4484 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
4486 __kmp_task_start(gtid, task, current_task);
4488 __kmp_task_finish<false>(gtid, task, current_task);
4492 #if OMPT_SUPPORT && OMPT_OPTIONAL 4493 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
4494 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
4495 if (ompt_enabled.ompt_callback_work) {
4496 ompt_callbacks.ompt_callback(ompt_callback_work)(
4497 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
4498 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4502 if (num_tasks_min == 0)
4505 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
4511 grainsize = thread->th.th_team_nproc * 10;
4514 if (grainsize > tc) {
4519 num_tasks = grainsize;
4520 grainsize = tc / num_tasks;
4521 extras = tc % num_tasks;
4525 if (grainsize > tc) {
4530 num_tasks = tc / grainsize;
4532 grainsize = tc / num_tasks;
4533 extras = tc % num_tasks;
4537 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
4539 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
4540 KMP_DEBUG_ASSERT(num_tasks > extras);
4541 KMP_DEBUG_ASSERT(num_tasks > 0);
4547 taskdata->td_flags.task_serial = 1;
4548 taskdata->td_flags.tiedness = TASK_TIED;
4550 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4551 grainsize, extras, tc,
4553 OMPT_GET_RETURN_ADDRESS(0),
4558 }
else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
4559 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu" 4560 "(%lld), grain %llu, extras %llu\n",
4561 gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
4562 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4563 grainsize, extras, tc, num_tasks_min,
4565 OMPT_GET_RETURN_ADDRESS(0),
4569 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu" 4570 "(%lld), grain %llu, extras %llu\n",
4571 gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
4572 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4573 grainsize, extras, tc,
4575 OMPT_GET_RETURN_ADDRESS(0),
4580 #if OMPT_SUPPORT && OMPT_OPTIONAL 4581 if (ompt_enabled.ompt_callback_work) {
4582 ompt_callbacks.ompt_callback(ompt_callback_work)(
4583 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
4584 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4589 #if OMPT_SUPPORT && OMPT_OPTIONAL 4590 OMPT_STORE_RETURN_ADDRESS(gtid);
4592 __kmpc_end_taskgroup(loc, gtid);
4594 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws)
void * __kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup)
struct kmp_taskred_input kmp_taskred_input_t
kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, kmp_task_affinity_info_t *affin_list)
void * __kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
struct kmp_taskred_data kmp_taskred_data_t
void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void * __kmpc_taskred_init(int gtid, int num, void *data)
void * __kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data)
void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask)
struct kmp_taskred_flags kmp_taskred_flags_t
void * __kmpc_task_reduction_init(int gtid, int num, void *data)
kmp_taskred_flags_t flags
struct kmp_task_red_input kmp_task_red_input_t