16 #include "kmp_stats.h" 17 #include "kmp_wait_release.h" 18 #include "kmp_taskdeps.h" 21 #include "ompt-specific.h" 24 #include "tsan_annotations.h" 27 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
28 kmp_info_t *this_thr);
29 static void __kmp_alloc_task_deque(kmp_info_t *thread,
30 kmp_thread_data_t *thread_data);
31 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
32 kmp_task_team_t *task_team);
33 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
35 #ifdef BUILD_TIED_TASK_STACK 44 static void __kmp_trace_task_stack(kmp_int32 gtid,
45 kmp_thread_data_t *thread_data,
46 int threshold,
char *location) {
47 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
48 kmp_taskdata_t **stack_top = task_stack->ts_top;
49 kmp_int32 entries = task_stack->ts_entries;
50 kmp_taskdata_t *tied_task;
54 (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " 55 "first_block = %p, stack_top = %p \n",
56 location, gtid, entries, task_stack->ts_first_block, stack_top));
58 KMP_DEBUG_ASSERT(stack_top != NULL);
59 KMP_DEBUG_ASSERT(entries > 0);
61 while (entries != 0) {
62 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
64 if (entries & TASK_STACK_INDEX_MASK == 0) {
65 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
67 stack_block = stack_block->sb_prev;
68 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
75 tied_task = *stack_top;
77 KMP_DEBUG_ASSERT(tied_task != NULL);
78 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
81 (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " 82 "stack_top=%p, tied_task=%p\n",
83 location, gtid, entries, stack_top, tied_task));
85 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
88 (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
98 static void __kmp_init_task_stack(kmp_int32 gtid,
99 kmp_thread_data_t *thread_data) {
100 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
101 kmp_stack_block_t *first_block;
104 first_block = &task_stack->ts_first_block;
105 task_stack->ts_top = (kmp_taskdata_t **)first_block;
106 memset((
void *)first_block,
'\0',
107 TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
110 task_stack->ts_entries = TASK_STACK_EMPTY;
111 first_block->sb_next = NULL;
112 first_block->sb_prev = NULL;
119 static void __kmp_free_task_stack(kmp_int32 gtid,
120 kmp_thread_data_t *thread_data) {
121 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
122 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
124 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
126 while (stack_block != NULL) {
127 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
129 stack_block->sb_next = NULL;
130 stack_block->sb_prev = NULL;
131 if (stack_block != &task_stack->ts_first_block) {
132 __kmp_thread_free(thread,
135 stack_block = next_block;
138 task_stack->ts_entries = 0;
139 task_stack->ts_top = NULL;
148 static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
149 kmp_taskdata_t *tied_task) {
151 kmp_thread_data_t *thread_data =
152 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
153 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
155 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
159 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
160 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
163 (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
164 gtid, thread, tied_task));
166 *(task_stack->ts_top) = tied_task;
169 task_stack->ts_top++;
170 task_stack->ts_entries++;
172 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
174 kmp_stack_block_t *stack_block =
175 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
178 if (stack_block->sb_next !=
180 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
182 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
183 thread,
sizeof(kmp_stack_block_t));
185 task_stack->ts_top = &new_block->sb_block[0];
186 stack_block->sb_next = new_block;
187 new_block->sb_prev = stack_block;
188 new_block->sb_next = NULL;
192 (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
193 gtid, tied_task, new_block));
196 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
207 static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
208 kmp_taskdata_t *ending_task) {
210 kmp_thread_data_t *thread_data =
211 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
212 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
213 kmp_taskdata_t *tied_task;
215 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
220 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
221 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
223 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
227 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
228 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
230 stack_block = stack_block->sb_prev;
231 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
235 task_stack->ts_top--;
236 task_stack->ts_entries--;
238 tied_task = *(task_stack->ts_top);
240 KMP_DEBUG_ASSERT(tied_task != NULL);
241 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
242 KMP_DEBUG_ASSERT(tied_task == ending_task);
244 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
253 static bool __kmp_task_is_allowed(
int gtid,
const kmp_int32 is_constrained,
254 const kmp_taskdata_t *tasknew,
255 const kmp_taskdata_t *taskcurr) {
256 if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
260 kmp_taskdata_t *current = taskcurr->td_last_tied;
261 KMP_DEBUG_ASSERT(current != NULL);
263 if (current->td_flags.tasktype == TASK_EXPLICIT ||
264 current->td_taskwait_thread > 0) {
265 kmp_int32 level = current->td_level;
266 kmp_taskdata_t *parent = tasknew->td_parent;
267 while (parent != current && parent->td_level > level) {
269 parent = parent->td_parent;
270 KMP_DEBUG_ASSERT(parent != NULL);
272 if (parent != current)
277 kmp_depnode_t *node = tasknew->td_depnode;
278 if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
279 for (
int i = 0; i < node->dn.mtx_num_locks; ++i) {
280 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
281 if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
284 for (
int j = i - 1; j >= 0; --j)
285 __kmp_release_lock(node->dn.mtx_locks[j], gtid);
289 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
298 static void __kmp_realloc_task_deque(kmp_info_t *thread,
299 kmp_thread_data_t *thread_data) {
300 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
301 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
302 kmp_int32 new_size = 2 * size;
304 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to " 305 "%d] for thread_data %p\n",
306 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
308 kmp_taskdata_t **new_deque =
309 (kmp_taskdata_t **)__kmp_allocate(new_size *
sizeof(kmp_taskdata_t *));
312 for (i = thread_data->td.td_deque_head, j = 0; j < size;
313 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
314 new_deque[j] = thread_data->td.td_deque[i];
316 __kmp_free(thread_data->td.td_deque);
318 thread_data->td.td_deque_head = 0;
319 thread_data->td.td_deque_tail = size;
320 thread_data->td.td_deque = new_deque;
321 thread_data->td.td_deque_size = new_size;
325 static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
326 kmp_info_t *thread = __kmp_threads[gtid];
327 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
329 if (taskdata->td_flags.hidden_helper) {
330 gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
331 thread = __kmp_threads[gtid];
334 kmp_task_team_t *task_team = thread->th.th_task_team;
335 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
336 kmp_thread_data_t *thread_data;
339 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
341 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
344 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
345 KMP_DEBUG_USE_VAR(counter);
348 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
349 gtid, counter, taskdata));
353 if (UNLIKELY(taskdata->td_flags.task_serial)) {
354 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning " 355 "TASK_NOT_PUSHED for task %p\n",
357 return TASK_NOT_PUSHED;
362 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
363 if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))) {
364 __kmp_enable_tasking(task_team, thread);
366 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
367 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
370 thread_data = &task_team->tt.tt_threads_data[tid];
375 if (UNLIKELY(thread_data->td.td_deque == NULL)) {
376 __kmp_alloc_task_deque(thread, thread_data);
381 if (TCR_4(thread_data->td.td_deque_ntasks) >=
382 TASK_DEQUE_SIZE(thread_data->td)) {
383 if (__kmp_enable_task_throttling &&
384 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
385 thread->th.th_current_task)) {
386 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning " 387 "TASK_NOT_PUSHED for task %p\n",
389 return TASK_NOT_PUSHED;
391 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
393 if (TCR_4(thread_data->td.td_deque_ntasks) >=
394 TASK_DEQUE_SIZE(thread_data->td)) {
396 __kmp_realloc_task_deque(thread, thread_data);
402 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
404 if (TCR_4(thread_data->td.td_deque_ntasks) >=
405 TASK_DEQUE_SIZE(thread_data->td)) {
406 if (__kmp_enable_task_throttling &&
407 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
408 thread->th.th_current_task)) {
409 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
410 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; " 411 "returning TASK_NOT_PUSHED for task %p\n",
413 return TASK_NOT_PUSHED;
416 __kmp_realloc_task_deque(thread, thread_data);
421 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
422 TASK_DEQUE_SIZE(thread_data->td));
424 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
427 thread_data->td.td_deque_tail =
428 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
429 TCW_4(thread_data->td.td_deque_ntasks,
430 TCR_4(thread_data->td.td_deque_ntasks) + 1);
431 KMP_FSYNC_RELEASING(thread->th.th_current_task);
432 KMP_FSYNC_RELEASING(taskdata);
433 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " 434 "task=%p ntasks=%d head=%u tail=%u\n",
435 gtid, taskdata, thread_data->td.td_deque_ntasks,
436 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
438 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
441 if (taskdata->td_flags.hidden_helper) {
443 __kmp_hidden_helper_worker_thread_signal();
446 return TASK_SUCCESSFULLY_PUSHED;
453 void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
454 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d " 455 "this_thread=%p, curtask=%p, " 456 "curtask_parent=%p\n",
457 0, this_thr, this_thr->th.th_current_task,
458 this_thr->th.th_current_task->td_parent));
460 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
462 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d " 463 "this_thread=%p, curtask=%p, " 464 "curtask_parent=%p\n",
465 0, this_thr, this_thr->th.th_current_task,
466 this_thr->th.th_current_task->td_parent));
475 void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
479 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p " 482 tid, this_thr, this_thr->th.th_current_task,
483 team->t.t_implicit_task_taskdata[tid].td_parent));
485 KMP_DEBUG_ASSERT(this_thr != NULL);
488 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
489 team->t.t_implicit_task_taskdata[0].td_parent =
490 this_thr->th.th_current_task;
491 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
494 team->t.t_implicit_task_taskdata[tid].td_parent =
495 team->t.t_implicit_task_taskdata[0].td_parent;
496 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
499 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p " 502 tid, this_thr, this_thr->th.th_current_task,
503 team->t.t_implicit_task_taskdata[tid].td_parent));
511 static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
512 kmp_taskdata_t *current_task) {
513 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
514 kmp_info_t *thread = __kmp_threads[gtid];
517 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
518 gtid, taskdata, current_task));
520 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
525 current_task->td_flags.executing = 0;
528 #ifdef BUILD_TIED_TASK_STACK 529 if (taskdata->td_flags.tiedness == TASK_TIED) {
530 __kmp_push_task_stack(gtid, thread, taskdata);
535 thread->th.th_current_task = taskdata;
537 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
538 taskdata->td_flags.tiedness == TASK_UNTIED);
539 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
540 taskdata->td_flags.tiedness == TASK_UNTIED);
541 taskdata->td_flags.started = 1;
542 taskdata->td_flags.executing = 1;
543 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
544 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
551 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
562 static inline void __ompt_task_init(kmp_taskdata_t *task,
int tid) {
564 task->ompt_task_info.task_data.value = 0;
565 task->ompt_task_info.frame.exit_frame = ompt_data_none;
566 task->ompt_task_info.frame.enter_frame = ompt_data_none;
567 task->ompt_task_info.frame.exit_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
568 task->ompt_task_info.frame.enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
573 static inline void __ompt_task_start(kmp_task_t *task,
574 kmp_taskdata_t *current_task,
576 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
577 ompt_task_status_t status = ompt_task_switch;
578 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
579 status = ompt_task_yield;
580 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
583 if (ompt_enabled.ompt_callback_task_schedule) {
584 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
585 &(current_task->ompt_task_info.task_data), status,
586 &(taskdata->ompt_task_info.task_data));
588 taskdata->ompt_task_info.scheduling_parent = current_task;
593 static inline void __ompt_task_finish(kmp_task_t *task,
594 kmp_taskdata_t *resumed_task,
595 ompt_task_status_t status) {
596 if (ompt_enabled.ompt_callback_task_schedule) {
597 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
598 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
599 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
600 status = ompt_task_cancel;
604 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
605 &(taskdata->ompt_task_info.task_data), status,
606 (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
612 static void __kmpc_omp_task_begin_if0_template(
ident_t *loc_ref, kmp_int32 gtid,
615 void *return_address) {
616 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
617 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
619 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p " 621 gtid, loc_ref, taskdata, current_task));
623 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
626 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
627 KMP_DEBUG_USE_VAR(counter);
628 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) " 629 "incremented for task %p\n",
630 gtid, counter, taskdata));
633 taskdata->td_flags.task_serial =
635 __kmp_task_start(gtid, task, current_task);
639 if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
640 current_task->ompt_task_info.frame.enter_frame.ptr =
641 taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
642 current_task->ompt_task_info.frame.enter_frame_flags =
643 taskdata->ompt_task_info.frame.exit_frame_flags = ompt_frame_application | ompt_frame_framepointer;
645 if (ompt_enabled.ompt_callback_task_create) {
646 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
647 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
648 &(parent_info->task_data), &(parent_info->frame),
649 &(taskdata->ompt_task_info.task_data),
650 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
653 __ompt_task_start(task, current_task, gtid);
655 #endif // OMPT_SUPPORT 657 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
663 static void __kmpc_omp_task_begin_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
666 void *return_address) {
667 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
670 #endif // OMPT_SUPPORT 678 void __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid,
681 if (UNLIKELY(ompt_enabled.enabled)) {
682 OMPT_STORE_RETURN_ADDRESS(gtid);
683 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
684 OMPT_GET_FRAME_ADDRESS(1),
685 OMPT_LOAD_RETURN_ADDRESS(gtid));
689 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
695 void __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
696 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
700 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
701 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
703 __kmp_task_start(gtid, task, current_task);
705 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
706 loc_ref, KMP_TASK_TO_TASKDATA(task)));
709 #endif // TASK_UNUSED 716 static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
717 kmp_info_t *thread) {
718 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
722 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
723 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
724 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
725 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
726 KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
727 taskdata->td_flags.task_serial == 1);
728 KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
730 taskdata->td_flags.freed = 1;
731 ANNOTATE_HAPPENS_BEFORE(taskdata);
734 __kmp_fast_free(thread, taskdata);
736 __kmp_thread_free(thread, taskdata);
738 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
747 static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
748 kmp_taskdata_t *taskdata,
749 kmp_info_t *thread) {
752 kmp_int32 team_serial =
753 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
754 !taskdata->td_flags.proxy;
755 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
757 kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
758 KMP_DEBUG_ASSERT(children >= 0);
761 while (children == 0) {
762 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
764 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete " 765 "and freeing itself\n",
769 __kmp_free_task(gtid, taskdata, thread);
771 taskdata = parent_taskdata;
777 if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
778 if (taskdata->td_dephash) {
779 int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
780 kmp_tasking_flags_t flags_old = taskdata->td_flags;
781 if (children == 0 && flags_old.complete == 1) {
782 kmp_tasking_flags_t flags_new = flags_old;
783 flags_new.complete = 0;
784 if (KMP_COMPARE_AND_STORE_ACQ32(
785 RCAST(kmp_int32 *, &taskdata->td_flags),
786 *RCAST(kmp_int32 *, &flags_old),
787 *RCAST(kmp_int32 *, &flags_new))) {
788 KA_TRACE(100, (
"__kmp_free_task_and_ancestors: T#%d cleans " 789 "dephash of implicit task %p\n",
792 __kmp_dephash_free_entries(thread, taskdata->td_dephash);
799 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
800 KMP_DEBUG_ASSERT(children >= 0);
804 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " 805 "not freeing it yet\n",
806 gtid, taskdata, children));
819 static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
820 kmp_taskdata_t *resumed_task) {
821 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
822 kmp_info_t *thread = __kmp_threads[gtid];
823 kmp_task_team_t *task_team =
824 thread->th.th_task_team;
825 kmp_int32 children = 0;
827 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming " 829 gtid, taskdata, resumed_task));
831 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
834 #ifdef BUILD_TIED_TASK_STACK 835 if (taskdata->td_flags.tiedness == TASK_TIED) {
836 __kmp_pop_task_stack(gtid, thread, taskdata);
840 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
843 kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
846 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
847 gtid, counter, taskdata));
851 if (resumed_task == NULL) {
852 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
853 resumed_task = taskdata->td_parent;
856 thread->th.th_current_task = resumed_task;
857 resumed_task->td_flags.executing = 1;
858 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, " 859 "resuming task %p\n",
860 gtid, taskdata, resumed_task));
868 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
869 taskdata->td_flags.task_serial);
870 if (taskdata->td_flags.task_serial) {
871 if (resumed_task == NULL) {
872 resumed_task = taskdata->td_parent;
876 KMP_DEBUG_ASSERT(resumed_task !=
886 if (taskdata->td_flags.destructors_thunk) {
887 kmp_routine_entry_t destr_thunk = task->data1.destructors;
888 KMP_ASSERT(destr_thunk);
889 destr_thunk(gtid, task);
892 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
893 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
894 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
897 if (taskdata->td_flags.detachable == TASK_DETACHABLE) {
898 if (taskdata->td_allow_completion_event.type ==
899 KMP_EVENT_ALLOW_COMPLETION) {
901 __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
902 if (taskdata->td_allow_completion_event.type ==
903 KMP_EVENT_ALLOW_COMPLETION) {
905 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
906 taskdata->td_flags.executing = 0;
913 __ompt_task_finish(task, resumed_task, ompt_task_detach);
919 taskdata->td_flags.proxy = TASK_PROXY;
922 __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
927 taskdata->td_flags.complete = 1;
932 __ompt_task_finish(task, resumed_task, ompt_task_complete);
937 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
938 taskdata->td_flags.detachable == TASK_DETACHABLE ||
939 taskdata->td_flags.hidden_helper) {
942 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
943 KMP_DEBUG_ASSERT(children >= 0);
944 if (taskdata->td_taskgroup)
945 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
946 __kmp_release_deps(gtid, taskdata);
947 }
else if (task_team && task_team->tt.tt_found_proxy_tasks) {
950 __kmp_release_deps(gtid, taskdata);
956 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
957 taskdata->td_flags.executing = 0;
962 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
963 gtid, taskdata, children));
969 thread->th.th_current_task = resumed_task;
971 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
975 resumed_task->td_flags.executing = 1;
978 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
979 gtid, taskdata, resumed_task));
985 static void __kmpc_omp_task_complete_if0_template(
ident_t *loc_ref,
988 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
989 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
990 __kmp_assert_valid_gtid(gtid);
992 __kmp_task_finish<ompt>(gtid, task, NULL);
994 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
995 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
999 ompt_frame_t *ompt_frame;
1000 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1001 ompt_frame->enter_frame = ompt_data_none;
1002 ompt_frame->enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
1011 void __kmpc_omp_task_complete_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1013 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
1015 #endif // OMPT_SUPPORT 1022 void __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid,
1025 if (UNLIKELY(ompt_enabled.enabled)) {
1026 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
1030 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
1036 void __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid,
1038 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
1039 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1041 __kmp_task_finish<false>(gtid, task,
1044 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
1045 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1048 #endif // TASK_UNUSED 1061 void __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr,
1062 kmp_team_t *team,
int tid,
int set_curr_task) {
1063 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
1067 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
1068 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE"));
1070 task->td_task_id = KMP_GEN_TASK_ID();
1071 task->td_team = team;
1074 task->td_ident = loc_ref;
1075 task->td_taskwait_ident = NULL;
1076 task->td_taskwait_counter = 0;
1077 task->td_taskwait_thread = 0;
1079 task->td_flags.tiedness = TASK_TIED;
1080 task->td_flags.tasktype = TASK_IMPLICIT;
1081 task->td_flags.proxy = TASK_FULL;
1084 task->td_flags.task_serial = 1;
1085 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1086 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1088 task->td_flags.started = 1;
1089 task->td_flags.executing = 1;
1090 task->td_flags.complete = 0;
1091 task->td_flags.freed = 0;
1093 task->td_depnode = NULL;
1094 task->td_last_tied = task;
1095 task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1097 if (set_curr_task) {
1098 KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
1100 KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
1101 task->td_taskgroup = NULL;
1102 task->td_dephash = NULL;
1103 __kmp_push_current_task_to_thread(this_thr, team, tid);
1105 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
1106 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
1110 if (UNLIKELY(ompt_enabled.enabled))
1111 __ompt_task_init(task, tid);
1114 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
1123 void __kmp_finish_implicit_task(kmp_info_t *thread) {
1124 kmp_taskdata_t *task = thread->th.th_current_task;
1125 if (task->td_dephash) {
1127 task->td_flags.complete = 1;
1128 children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
1129 kmp_tasking_flags_t flags_old = task->td_flags;
1130 if (children == 0 && flags_old.complete == 1) {
1131 kmp_tasking_flags_t flags_new = flags_old;
1132 flags_new.complete = 0;
1133 if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
1134 *RCAST(kmp_int32 *, &flags_old),
1135 *RCAST(kmp_int32 *, &flags_new))) {
1136 KA_TRACE(100, (
"__kmp_finish_implicit_task: T#%d cleans " 1137 "dephash of implicit task %p\n",
1138 thread->th.th_info.ds.ds_gtid, task));
1139 __kmp_dephash_free_entries(thread, task->td_dephash);
1149 void __kmp_free_implicit_task(kmp_info_t *thread) {
1150 kmp_taskdata_t *task = thread->th.th_current_task;
1151 if (task && task->td_dephash) {
1152 __kmp_dephash_free(thread, task->td_dephash);
1153 task->td_dephash = NULL;
1159 static size_t __kmp_round_up_to_val(
size_t size,
size_t val) {
1160 if (size & (val - 1)) {
1162 if (size <= KMP_SIZE_T_MAX - val) {
1181 kmp_task_t *__kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1182 kmp_tasking_flags_t *flags,
1183 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1184 kmp_routine_entry_t task_entry) {
1186 kmp_taskdata_t *taskdata;
1187 kmp_info_t *thread = __kmp_threads[gtid];
1188 kmp_info_t *encountering_thread = thread;
1189 kmp_team_t *team = thread->th.th_team;
1190 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1191 size_t shareds_offset;
1193 if (UNLIKELY(!TCR_4(__kmp_init_middle)))
1194 __kmp_middle_initialize();
1196 if (flags->hidden_helper) {
1197 if (__kmp_enable_hidden_helper) {
1198 if (!TCR_4(__kmp_init_hidden_helper))
1199 __kmp_hidden_helper_initialize();
1204 if (!KMP_HIDDEN_HELPER_THREAD(gtid)) {
1205 thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
1211 flags->hidden_helper = FALSE;
1215 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " 1216 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1217 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
1218 sizeof_shareds, task_entry));
1220 if (parent_task->td_flags.final) {
1221 if (flags->merged_if0) {
1226 if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
1231 encountering_thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1237 if (flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE ||
1238 flags->hidden_helper) {
1239 if (flags->proxy == TASK_PROXY) {
1240 flags->tiedness = TASK_UNTIED;
1241 flags->merged_if0 = 1;
1245 if ((encountering_thread->th.th_task_team) == NULL) {
1248 KMP_DEBUG_ASSERT(team->t.t_serialized);
1250 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
1252 __kmp_task_team_setup(
1253 encountering_thread, team,
1255 encountering_thread->th.th_task_team =
1256 team->t.t_task_team[encountering_thread->th.th_task_state];
1258 kmp_task_team_t *task_team = encountering_thread->th.th_task_team;
1261 if (!KMP_TASKING_ENABLED(task_team)) {
1264 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1265 __kmp_enable_tasking(task_team, encountering_thread);
1266 kmp_int32 tid = encountering_thread->th.th_info.ds.ds_tid;
1267 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1269 if (thread_data->td.td_deque == NULL) {
1270 __kmp_alloc_task_deque(encountering_thread, thread_data);
1274 if (flags->proxy == TASK_PROXY &&
1275 task_team->tt.tt_found_proxy_tasks == FALSE)
1276 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1277 if (flags->hidden_helper &&
1278 task_team->tt.tt_hidden_helper_task_encountered == FALSE)
1279 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE);
1284 shareds_offset =
sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1285 shareds_offset = __kmp_round_up_to_val(shareds_offset,
sizeof(
void *));
1288 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1290 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1295 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(
1296 encountering_thread, shareds_offset + sizeof_shareds);
1298 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(
1299 encountering_thread, shareds_offset + sizeof_shareds);
1301 ANNOTATE_HAPPENS_AFTER(taskdata);
1303 task = KMP_TASKDATA_TO_TASK(taskdata);
1306 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD 1307 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(
double) - 1)) == 0);
1308 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(
double) - 1)) == 0);
1310 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(_Quad) - 1)) == 0);
1311 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(_Quad) - 1)) == 0);
1313 if (sizeof_shareds > 0) {
1315 task->shareds = &((
char *)taskdata)[shareds_offset];
1317 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
1320 task->shareds = NULL;
1322 task->routine = task_entry;
1325 taskdata->td_task_id = KMP_GEN_TASK_ID();
1326 taskdata->td_team = thread->th.th_team;
1327 taskdata->td_alloc_thread = encountering_thread;
1328 taskdata->td_parent = parent_task;
1329 taskdata->td_level = parent_task->td_level + 1;
1330 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
1331 taskdata->td_ident = loc_ref;
1332 taskdata->td_taskwait_ident = NULL;
1333 taskdata->td_taskwait_counter = 0;
1334 taskdata->td_taskwait_thread = 0;
1335 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1337 if (flags->proxy == TASK_FULL)
1338 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1340 taskdata->td_flags.tiedness = flags->tiedness;
1341 taskdata->td_flags.final = flags->final;
1342 taskdata->td_flags.merged_if0 = flags->merged_if0;
1343 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1344 taskdata->td_flags.proxy = flags->proxy;
1345 taskdata->td_flags.detachable = flags->detachable;
1346 taskdata->td_flags.hidden_helper = flags->hidden_helper;
1347 taskdata->encountering_gtid = gtid;
1348 taskdata->td_task_team = thread->th.th_task_team;
1349 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1350 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1353 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1356 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1362 taskdata->td_flags.task_serial =
1363 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1364 taskdata->td_flags.tasking_ser || flags->merged_if0);
1366 taskdata->td_flags.started = 0;
1367 taskdata->td_flags.executing = 0;
1368 taskdata->td_flags.complete = 0;
1369 taskdata->td_flags.freed = 0;
1371 taskdata->td_flags.native = flags->native;
1373 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
1375 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
1376 taskdata->td_taskgroup =
1377 parent_task->td_taskgroup;
1378 taskdata->td_dephash = NULL;
1379 taskdata->td_depnode = NULL;
1380 if (flags->tiedness == TASK_UNTIED)
1381 taskdata->td_last_tied = NULL;
1383 taskdata->td_last_tied = taskdata;
1384 taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1386 if (UNLIKELY(ompt_enabled.enabled))
1387 __ompt_task_init(taskdata, gtid);
1391 if (flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE ||
1392 flags->hidden_helper ||
1393 !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
1394 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
1395 if (parent_task->td_taskgroup)
1396 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
1399 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1400 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
1404 if (flags->hidden_helper) {
1405 taskdata->td_flags.task_serial = FALSE;
1407 KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks);
1410 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1411 gtid, taskdata, taskdata->td_parent));
1412 ANNOTATE_HAPPENS_BEFORE(task);
1417 kmp_task_t *__kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1418 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1419 size_t sizeof_shareds,
1420 kmp_routine_entry_t task_entry) {
1422 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1423 __kmp_assert_valid_gtid(gtid);
1424 input_flags->native = FALSE;
1426 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) " 1427 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1428 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1429 input_flags->proxy ?
"proxy" :
"",
1430 input_flags->detachable ?
"detachable" :
"", sizeof_kmp_task_t,
1431 sizeof_shareds, task_entry));
1433 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1434 sizeof_shareds, task_entry);
1436 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1441 kmp_task_t *__kmpc_omp_target_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1443 size_t sizeof_kmp_task_t,
1444 size_t sizeof_shareds,
1445 kmp_routine_entry_t task_entry,
1446 kmp_int64 device_id) {
1447 if (__kmp_enable_hidden_helper) {
1448 auto &input_flags =
reinterpret_cast<kmp_tasking_flags_t &
>(flags);
1449 input_flags.hidden_helper = TRUE;
1452 return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
1453 sizeof_shareds, task_entry);
1471 kmp_task_t *new_task, kmp_int32 naffins,
1472 kmp_task_affinity_info_t *affin_list) {
1481 static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1482 kmp_taskdata_t *current_task) {
1483 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1487 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1488 gtid, taskdata, current_task));
1489 KMP_DEBUG_ASSERT(task);
1490 if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&
1491 taskdata->td_flags.complete == 1)) {
1496 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1499 __kmp_bottom_half_finish_proxy(gtid, task);
1501 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for " 1502 "proxy task %p, resuming task %p\n",
1503 gtid, taskdata, current_task));
1511 ompt_thread_info_t oldInfo;
1512 if (UNLIKELY(ompt_enabled.enabled)) {
1514 thread = __kmp_threads[gtid];
1515 oldInfo = thread->th.ompt_thread_info;
1516 thread->th.ompt_thread_info.wait_id = 0;
1517 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1518 ? ompt_state_work_serial
1519 : ompt_state_work_parallel;
1520 taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1525 if (taskdata->td_flags.hidden_helper) {
1527 KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid));
1528 KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks);
1532 if (taskdata->td_flags.proxy != TASK_PROXY) {
1533 ANNOTATE_HAPPENS_AFTER(task);
1534 __kmp_task_start(gtid, task, current_task);
1540 if (UNLIKELY(__kmp_omp_cancellation)) {
1541 thread = __kmp_threads[gtid];
1542 kmp_team_t *this_team = thread->th.th_team;
1543 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1544 if ((taskgroup && taskgroup->cancel_request) ||
1545 (this_team->t.t_cancel_request == cancel_parallel)) {
1546 #if OMPT_SUPPORT && OMPT_OPTIONAL 1547 ompt_data_t *task_data;
1548 if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
1549 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
1550 ompt_callbacks.ompt_callback(ompt_callback_cancel)(
1552 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1553 : ompt_cancel_parallel) |
1554 ompt_cancel_discarded_task,
1567 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
1568 taskdata->td_last_tied = current_task->td_last_tied;
1569 KMP_DEBUG_ASSERT(taskdata->td_last_tied);
1571 #if KMP_STATS_ENABLED 1573 switch (KMP_GET_THREAD_STATE()) {
1574 case FORK_JOIN_BARRIER:
1575 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1578 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1581 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1584 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1587 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1590 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1593 #endif // KMP_STATS_ENABLED 1597 if (UNLIKELY(ompt_enabled.enabled))
1598 __ompt_task_start(task, current_task, gtid);
1601 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1602 kmp_uint64 cur_time;
1603 kmp_int32 kmp_itt_count_task =
1604 __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1605 current_task->td_flags.tasktype == TASK_IMPLICIT;
1606 if (kmp_itt_count_task) {
1607 thread = __kmp_threads[gtid];
1609 if (thread->th.th_bar_arrive_time)
1610 cur_time = __itt_get_timestamp();
1612 kmp_itt_count_task = 0;
1614 KMP_FSYNC_ACQUIRED(taskdata);
1617 #ifdef KMP_GOMP_COMPAT 1618 if (taskdata->td_flags.native) {
1619 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1623 (*(task->routine))(gtid, task);
1625 KMP_POP_PARTITIONED_TIMER();
1627 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1628 if (kmp_itt_count_task) {
1630 thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1632 KMP_FSYNC_CANCEL(taskdata);
1633 KMP_FSYNC_RELEASING(taskdata->td_parent);
1639 if (taskdata->td_flags.proxy != TASK_PROXY) {
1640 ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
1642 if (UNLIKELY(ompt_enabled.enabled)) {
1643 thread->th.ompt_thread_info = oldInfo;
1644 if (taskdata->td_flags.tiedness == TASK_TIED) {
1645 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1647 __kmp_task_finish<true>(gtid, task, current_task);
1650 __kmp_task_finish<false>(gtid, task, current_task);
1655 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1656 gtid, taskdata, current_task));
1670 kmp_int32 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid,
1671 kmp_task_t *new_task) {
1672 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1674 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1675 loc_ref, new_taskdata));
1678 kmp_taskdata_t *parent;
1679 if (UNLIKELY(ompt_enabled.enabled)) {
1680 parent = new_taskdata->td_parent;
1681 if (ompt_enabled.ompt_callback_task_create) {
1682 ompt_data_t task_data = ompt_data_none;
1683 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1684 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1685 parent ? &(parent->ompt_task_info.frame) : NULL,
1686 &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
1687 OMPT_GET_RETURN_ADDRESS(0));
1695 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1697 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1698 new_taskdata->td_flags.task_serial = 1;
1699 __kmp_invoke_task(gtid, new_task, current_task);
1704 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " 1705 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1706 gtid, loc_ref, new_taskdata));
1708 ANNOTATE_HAPPENS_BEFORE(new_task);
1710 if (UNLIKELY(ompt_enabled.enabled)) {
1711 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1714 return TASK_CURRENT_NOT_QUEUED;
1728 kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1729 bool serialize_immediate) {
1730 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1734 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1735 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1737 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1738 if (serialize_immediate)
1739 new_taskdata->td_flags.task_serial = 1;
1740 __kmp_invoke_task(gtid, new_task, current_task);
1743 ANNOTATE_HAPPENS_BEFORE(new_task);
1744 return TASK_CURRENT_NOT_QUEUED;
1759 kmp_int32 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid,
1760 kmp_task_t *new_task) {
1762 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1764 #if KMP_DEBUG || OMPT_SUPPORT 1765 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1767 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1769 __kmp_assert_valid_gtid(gtid);
1772 kmp_taskdata_t *parent = NULL;
1773 if (UNLIKELY(ompt_enabled.enabled)) {
1774 if (!new_taskdata->td_flags.started) {
1775 OMPT_STORE_RETURN_ADDRESS(gtid);
1776 parent = new_taskdata->td_parent;
1777 if (!parent->ompt_task_info.frame.enter_frame.ptr) {
1778 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1780 if (ompt_enabled.ompt_callback_task_create) {
1781 ompt_data_t task_data = ompt_data_none;
1782 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1783 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1784 parent ? &(parent->ompt_task_info.frame) : NULL,
1785 &(new_taskdata->ompt_task_info.task_data),
1786 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1787 OMPT_LOAD_RETURN_ADDRESS(gtid));
1792 __ompt_task_finish(new_task,
1793 new_taskdata->ompt_task_info.scheduling_parent,
1795 new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1800 res = __kmp_omp_task(gtid, new_task,
true);
1802 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning " 1803 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1804 gtid, loc_ref, new_taskdata));
1806 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1807 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1826 kmp_int32 __kmp_omp_taskloop_task(
ident_t *loc_ref, kmp_int32 gtid,
1827 kmp_task_t *new_task,
void *codeptr_ra) {
1829 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1831 #if KMP_DEBUG || OMPT_SUPPORT 1832 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1834 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1838 kmp_taskdata_t *parent = NULL;
1839 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
1840 parent = new_taskdata->td_parent;
1841 if (!parent->ompt_task_info.frame.enter_frame.ptr)
1842 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1843 if (ompt_enabled.ompt_callback_task_create) {
1844 ompt_data_t task_data = ompt_data_none;
1845 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1846 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1847 parent ? &(parent->ompt_task_info.frame) : NULL,
1848 &(new_taskdata->ompt_task_info.task_data),
1849 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1855 res = __kmp_omp_task(gtid, new_task,
true);
1857 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning " 1858 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1859 gtid, loc_ref, new_taskdata));
1861 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1862 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1868 template <
bool ompt>
1869 static kmp_int32 __kmpc_omp_taskwait_template(
ident_t *loc_ref, kmp_int32 gtid,
1870 void *frame_address,
1871 void *return_address) {
1872 kmp_taskdata_t *taskdata;
1874 int thread_finished = FALSE;
1875 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
1877 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
1878 __kmp_assert_valid_gtid(gtid);
1880 if (__kmp_tasking_mode != tskm_immediate_exec) {
1881 thread = __kmp_threads[gtid];
1882 taskdata = thread->th.th_current_task;
1884 #if OMPT_SUPPORT && OMPT_OPTIONAL 1885 ompt_data_t *my_task_data;
1886 ompt_data_t *my_parallel_data;
1889 my_task_data = &(taskdata->ompt_task_info.task_data);
1890 my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
1892 taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
1894 if (ompt_enabled.ompt_callback_sync_region) {
1895 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1896 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1897 my_task_data, return_address);
1900 if (ompt_enabled.ompt_callback_sync_region_wait) {
1901 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1902 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1903 my_task_data, return_address);
1906 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1913 taskdata->td_taskwait_counter += 1;
1914 taskdata->td_taskwait_ident = loc_ref;
1915 taskdata->td_taskwait_thread = gtid + 1;
1918 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1919 if (UNLIKELY(itt_sync_obj != NULL))
1920 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
1924 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
1926 must_wait = must_wait || (thread->th.th_task_team != NULL &&
1927 thread->th.th_task_team->tt.tt_found_proxy_tasks);
1931 (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL &&
1932 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
1935 kmp_flag_32<false, false> flag(
1936 RCAST(std::atomic<kmp_uint32> *,
1937 &(taskdata->td_incomplete_child_tasks)),
1939 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
1940 flag.execute_tasks(thread, gtid, FALSE,
1941 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1942 __kmp_task_stealing_constraint);
1946 if (UNLIKELY(itt_sync_obj != NULL))
1947 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
1948 KMP_FSYNC_ACQUIRED(taskdata);
1953 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1955 #if OMPT_SUPPORT && OMPT_OPTIONAL 1957 if (ompt_enabled.ompt_callback_sync_region_wait) {
1958 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1959 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1960 my_task_data, return_address);
1962 if (ompt_enabled.ompt_callback_sync_region) {
1963 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1964 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1965 my_task_data, return_address);
1967 taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
1969 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1971 ANNOTATE_HAPPENS_AFTER(taskdata);
1974 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " 1975 "returning TASK_CURRENT_NOT_QUEUED\n",
1978 return TASK_CURRENT_NOT_QUEUED;
1981 #if OMPT_SUPPORT && OMPT_OPTIONAL 1983 static kmp_int32 __kmpc_omp_taskwait_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1984 void *frame_address,
1985 void *return_address) {
1986 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
1989 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1993 kmp_int32 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid) {
1994 #if OMPT_SUPPORT && OMPT_OPTIONAL 1995 if (UNLIKELY(ompt_enabled.enabled)) {
1996 OMPT_STORE_RETURN_ADDRESS(gtid);
1997 return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
1998 OMPT_LOAD_RETURN_ADDRESS(gtid));
2001 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
2005 kmp_int32 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part) {
2006 kmp_taskdata_t *taskdata;
2008 int thread_finished = FALSE;
2011 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
2013 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
2014 gtid, loc_ref, end_part));
2015 __kmp_assert_valid_gtid(gtid);
2017 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
2018 thread = __kmp_threads[gtid];
2019 taskdata = thread->th.th_current_task;
2026 taskdata->td_taskwait_counter += 1;
2027 taskdata->td_taskwait_ident = loc_ref;
2028 taskdata->td_taskwait_thread = gtid + 1;
2031 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
2032 if (UNLIKELY(itt_sync_obj != NULL))
2033 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
2035 if (!taskdata->td_flags.team_serial) {
2036 kmp_task_team_t *task_team = thread->th.th_task_team;
2037 if (task_team != NULL) {
2038 if (KMP_TASKING_ENABLED(task_team)) {
2040 if (UNLIKELY(ompt_enabled.enabled))
2041 thread->th.ompt_thread_info.ompt_task_yielded = 1;
2043 __kmp_execute_tasks_32(
2044 thread, gtid, (kmp_flag_32<> *)NULL, FALSE,
2045 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2046 __kmp_task_stealing_constraint);
2048 if (UNLIKELY(ompt_enabled.enabled))
2049 thread->th.ompt_thread_info.ompt_task_yielded = 0;
2055 if (UNLIKELY(itt_sync_obj != NULL))
2056 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
2061 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2064 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, " 2065 "returning TASK_CURRENT_NOT_QUEUED\n",
2068 return TASK_CURRENT_NOT_QUEUED;
2089 unsigned reserved31 : 31;
2144 item.reduce_orig = NULL;
2149 if (src.reduce_orig != NULL) {
2150 item.reduce_orig = src.reduce_orig;
2152 item.reduce_orig = src.reduce_shar;
2160 ((void (*)(
void *))item.reduce_init)((
char *)(item.reduce_priv) + offset);
2165 ((void (*)(
void *,
void *))item.reduce_init)(
2166 (
char *)(item.reduce_priv) + offset, item.reduce_orig);
2169 template <
typename T>
2170 void *__kmp_task_reduction_init(
int gtid,
int num, T *data) {
2171 __kmp_assert_valid_gtid(gtid);
2172 kmp_info_t *thread = __kmp_threads[gtid];
2173 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2174 kmp_uint32 nth = thread->th.th_team_nproc;
2178 KMP_ASSERT(tg != NULL);
2179 KMP_ASSERT(data != NULL);
2180 KMP_ASSERT(num > 0);
2182 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
2186 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
2190 for (
int i = 0; i < num; ++i) {
2191 size_t size = data[i].reduce_size - 1;
2193 size += CACHE_LINE - size % CACHE_LINE;
2194 KMP_ASSERT(data[i].reduce_comb != NULL);
2197 arr[i].
flags = data[i].flags;
2201 __kmp_assign_orig<T>(arr[i], data[i]);
2202 if (!arr[i].flags.lazy_priv) {
2204 arr[i].reduce_priv = __kmp_allocate(nth * size);
2205 arr[i].reduce_pend = (
char *)(arr[i].reduce_priv) + nth * size;
2206 if (arr[i].reduce_init != NULL) {
2208 for (
size_t j = 0; j < nth; ++j) {
2209 __kmp_call_init<T>(arr[i], j * size);
2216 arr[i].reduce_priv = __kmp_allocate(nth *
sizeof(
void *));
2219 tg->reduce_data = (
void *)arr;
2220 tg->reduce_num_data = num;
2259 template <
typename T>
2260 void __kmp_task_reduction_init_copy(kmp_info_t *thr,
int num, T *data,
2261 kmp_taskgroup_t *tg,
void *reduce_data) {
2263 KA_TRACE(20, (
"__kmp_task_reduction_init_copy: Th %p, init taskgroup %p," 2265 thr, tg, reduce_data));
2270 for (
int i = 0; i < num; ++i) {
2273 tg->reduce_data = (
void *)arr;
2274 tg->reduce_num_data = num;
2287 __kmp_assert_valid_gtid(gtid);
2288 kmp_info_t *thread = __kmp_threads[gtid];
2289 kmp_int32 nth = thread->th.th_team_nproc;
2293 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
2295 tg = thread->th.th_current_task->td_taskgroup;
2296 KMP_ASSERT(tg != NULL);
2298 kmp_int32 num = tg->reduce_num_data;
2299 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2301 KMP_ASSERT(data != NULL);
2302 while (tg != NULL) {
2303 for (
int i = 0; i < num; ++i) {
2304 if (!arr[i].flags.lazy_priv) {
2305 if (data == arr[i].reduce_shar ||
2306 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
2307 return (
char *)(arr[i].
reduce_priv) + tid * arr[i].reduce_size;
2310 void **p_priv = (
void **)(arr[i].reduce_priv);
2311 if (data == arr[i].reduce_shar)
2314 for (
int j = 0; j < nth; ++j)
2315 if (data == p_priv[j])
2319 if (p_priv[tid] == NULL) {
2321 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
2322 if (arr[i].reduce_init != NULL) {
2323 if (arr[i].reduce_orig != NULL) {
2325 p_priv[tid], arr[i].reduce_orig);
2327 ((void (*)(
void *))arr[i].
reduce_init)(p_priv[tid]);
2336 num = tg->reduce_num_data;
2338 KMP_ASSERT2(0,
"Unknown task reduction item");
2344 static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
2345 kmp_int32 nth = th->th.th_team_nproc;
2346 KMP_DEBUG_ASSERT(nth > 1);
2348 kmp_int32 num = tg->reduce_num_data;
2349 for (
int i = 0; i < num; ++i) {
2351 void (*f_fini)(
void *) = (
void (*)(
void *))(arr[i].
reduce_fini);
2352 void (*f_comb)(
void *,
void *) =
2354 if (!arr[i].flags.lazy_priv) {
2357 for (
int j = 0; j < nth; ++j) {
2358 void *priv_data = (
char *)pr_data + j * size;
2359 f_comb(sh_data, priv_data);
2364 void **pr_data = (
void **)(arr[i].reduce_priv);
2365 for (
int j = 0; j < nth; ++j) {
2366 if (pr_data[j] != NULL) {
2367 f_comb(sh_data, pr_data[j]);
2370 __kmp_free(pr_data[j]);
2374 __kmp_free(arr[i].reduce_priv);
2376 __kmp_thread_free(th, arr);
2377 tg->reduce_data = NULL;
2378 tg->reduce_num_data = 0;
2384 static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
2385 __kmp_thread_free(th, tg->reduce_data);
2386 tg->reduce_data = NULL;
2387 tg->reduce_num_data = 0;
2390 template <
typename T>
2391 void *__kmp_task_reduction_modifier_init(
ident_t *loc,
int gtid,
int is_ws,
2393 __kmp_assert_valid_gtid(gtid);
2394 kmp_info_t *thr = __kmp_threads[gtid];
2395 kmp_int32 nth = thr->th.th_team_nproc;
2396 __kmpc_taskgroup(loc, gtid);
2399 (
"__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",
2400 gtid, thr->th.th_current_task->td_taskgroup));
2401 return (
void *)thr->th.th_current_task->td_taskgroup;
2403 kmp_team_t *team = thr->th.th_team;
2405 kmp_taskgroup_t *tg;
2406 reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
2407 if (reduce_data == NULL &&
2408 __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2411 KMP_DEBUG_ASSERT(reduce_data == NULL);
2413 tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
2417 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0);
2418 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0);
2419 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data);
2422 (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) ==
2426 KMP_DEBUG_ASSERT(reduce_data > (
void *)1);
2427 tg = thr->th.th_current_task->td_taskgroup;
2428 __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
2450 int num,
void *data) {
2451 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2471 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2484 __kmpc_end_taskgroup(loc, gtid);
2488 void __kmpc_taskgroup(
ident_t *loc,
int gtid) {
2489 __kmp_assert_valid_gtid(gtid);
2490 kmp_info_t *thread = __kmp_threads[gtid];
2491 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2492 kmp_taskgroup_t *tg_new =
2493 (kmp_taskgroup_t *)__kmp_thread_malloc(thread,
sizeof(kmp_taskgroup_t));
2494 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
2495 KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
2496 KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
2497 tg_new->parent = taskdata->td_taskgroup;
2498 tg_new->reduce_data = NULL;
2499 tg_new->reduce_num_data = 0;
2500 taskdata->td_taskgroup = tg_new;
2502 #if OMPT_SUPPORT && OMPT_OPTIONAL 2503 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2504 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2506 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2507 kmp_team_t *team = thread->th.th_team;
2508 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2510 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2512 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2513 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2514 &(my_task_data), codeptr);
2521 void __kmpc_end_taskgroup(
ident_t *loc,
int gtid) {
2522 __kmp_assert_valid_gtid(gtid);
2523 kmp_info_t *thread = __kmp_threads[gtid];
2524 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2525 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2526 int thread_finished = FALSE;
2528 #if OMPT_SUPPORT && OMPT_OPTIONAL 2530 ompt_data_t my_task_data;
2531 ompt_data_t my_parallel_data;
2533 if (UNLIKELY(ompt_enabled.enabled)) {
2534 team = thread->th.th_team;
2535 my_task_data = taskdata->ompt_task_info.task_data;
2537 my_parallel_data = team->t.ompt_team_info.parallel_data;
2538 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2540 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2544 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
2545 KMP_DEBUG_ASSERT(taskgroup != NULL);
2546 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
2548 if (__kmp_tasking_mode != tskm_immediate_exec) {
2550 taskdata->td_taskwait_counter += 1;
2551 taskdata->td_taskwait_ident = loc;
2552 taskdata->td_taskwait_thread = gtid + 1;
2556 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
2557 if (UNLIKELY(itt_sync_obj != NULL))
2558 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
2561 #if OMPT_SUPPORT && OMPT_OPTIONAL 2562 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2563 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2564 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2565 &(my_task_data), codeptr);
2569 if (!taskdata->td_flags.team_serial ||
2570 (thread->th.th_task_team != NULL &&
2571 thread->th.th_task_team->tt.tt_found_proxy_tasks)) {
2572 kmp_flag_32<false, false> flag(
2573 RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U);
2574 while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
2575 flag.execute_tasks(thread, gtid, FALSE,
2576 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2577 __kmp_task_stealing_constraint);
2580 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2582 #if OMPT_SUPPORT && OMPT_OPTIONAL 2583 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2584 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2585 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2586 &(my_task_data), codeptr);
2591 if (UNLIKELY(itt_sync_obj != NULL))
2592 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
2593 KMP_FSYNC_ACQUIRED(taskdata);
2596 KMP_DEBUG_ASSERT(taskgroup->count == 0);
2598 if (taskgroup->reduce_data != NULL) {
2601 kmp_team_t *t = thread->th.th_team;
2605 if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL &&
2608 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]);
2609 if (cnt == thread->th.th_team_nproc - 1) {
2612 __kmp_task_reduction_fini(thread, taskgroup);
2615 __kmp_thread_free(thread, reduce_data);
2616 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL);
2617 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0);
2621 __kmp_task_reduction_clean(thread, taskgroup);
2623 }
else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) !=
2627 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]);
2628 if (cnt == thread->th.th_team_nproc - 1) {
2630 __kmp_task_reduction_fini(thread, taskgroup);
2633 __kmp_thread_free(thread, reduce_data);
2634 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL);
2635 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0);
2639 __kmp_task_reduction_clean(thread, taskgroup);
2643 __kmp_task_reduction_fini(thread, taskgroup);
2647 taskdata->td_taskgroup = taskgroup->parent;
2648 __kmp_thread_free(thread, taskgroup);
2650 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
2652 ANNOTATE_HAPPENS_AFTER(taskdata);
2654 #if OMPT_SUPPORT && OMPT_OPTIONAL 2655 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2656 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2657 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2658 &(my_task_data), codeptr);
2664 static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
2665 kmp_task_team_t *task_team,
2666 kmp_int32 is_constrained) {
2668 kmp_taskdata_t *taskdata;
2669 kmp_thread_data_t *thread_data;
2672 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2673 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
2676 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
2678 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
2679 gtid, thread_data->td.td_deque_ntasks,
2680 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2682 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2684 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: " 2685 "ntasks=%d head=%u tail=%u\n",
2686 gtid, thread_data->td.td_deque_ntasks,
2687 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2691 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2693 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2694 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2696 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " 2697 "ntasks=%d head=%u tail=%u\n",
2698 gtid, thread_data->td.td_deque_ntasks,
2699 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2703 tail = (thread_data->td.td_deque_tail - 1) &
2704 TASK_DEQUE_MASK(thread_data->td);
2705 taskdata = thread_data->td.td_deque[tail];
2707 if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
2708 thread->th.th_current_task)) {
2710 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2712 (
"__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: " 2713 "ntasks=%d head=%u tail=%u\n",
2714 gtid, thread_data->td.td_deque_ntasks,
2715 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2719 thread_data->td.td_deque_tail = tail;
2720 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
2722 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2724 KA_TRACE(10, (
"__kmp_remove_my_task(exit #4): T#%d task %p removed: " 2725 "ntasks=%d head=%u tail=%u\n",
2726 gtid, taskdata, thread_data->td.td_deque_ntasks,
2727 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2729 task = KMP_TASKDATA_TO_TASK(taskdata);
2736 static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
2737 kmp_task_team_t *task_team,
2738 std::atomic<kmp_int32> *unfinished_threads,
2739 int *thread_finished,
2740 kmp_int32 is_constrained) {
2742 kmp_taskdata_t *taskdata;
2743 kmp_taskdata_t *current;
2744 kmp_thread_data_t *victim_td, *threads_data;
2746 kmp_int32 victim_tid;
2748 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2750 threads_data = task_team->tt.tt_threads_data;
2751 KMP_DEBUG_ASSERT(threads_data != NULL);
2753 victim_tid = victim_thr->th.th_info.ds.ds_tid;
2754 victim_td = &threads_data[victim_tid];
2756 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: " 2757 "task_team=%p ntasks=%d head=%u tail=%u\n",
2758 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2759 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2760 victim_td->td.td_deque_tail));
2762 if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
2763 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: " 2764 "task_team=%p ntasks=%d head=%u tail=%u\n",
2765 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2766 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2767 victim_td->td.td_deque_tail));
2771 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
2773 int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
2776 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2777 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: " 2778 "task_team=%p ntasks=%d head=%u tail=%u\n",
2779 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2780 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2784 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
2785 current = __kmp_threads[gtid]->th.th_current_task;
2786 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
2787 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2789 victim_td->td.td_deque_head =
2790 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
2792 if (!task_team->tt.tt_untied_task_encountered) {
2794 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2795 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d could not steal from " 2796 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2797 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2798 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2803 target = victim_td->td.td_deque_head;
2805 for (i = 1; i < ntasks; ++i) {
2806 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2807 taskdata = victim_td->td.td_deque[target];
2808 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2814 if (taskdata == NULL) {
2816 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2817 KA_TRACE(10, (
"__kmp_steal_task(exit #4): T#%d could not steal from " 2818 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2819 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2820 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2824 for (i = i + 1; i < ntasks; ++i) {
2826 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2827 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
2831 victim_td->td.td_deque_tail ==
2832 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
2833 victim_td->td.td_deque_tail = target;
2835 if (*thread_finished) {
2841 count = KMP_ATOMIC_INC(unfinished_threads);
2845 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
2846 gtid, count + 1, task_team));
2848 *thread_finished = FALSE;
2850 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
2852 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2856 (
"__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: " 2857 "task_team=%p ntasks=%d head=%u tail=%u\n",
2858 gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
2859 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2861 task = KMP_TASKDATA_TO_TASK(taskdata);
2875 static inline int __kmp_execute_tasks_template(
2876 kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
2877 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2878 kmp_int32 is_constrained) {
2879 kmp_task_team_t *task_team = thread->th.th_task_team;
2880 kmp_thread_data_t *threads_data;
2882 kmp_info_t *other_thread;
2883 kmp_taskdata_t *current_task = thread->th.th_current_task;
2884 std::atomic<kmp_int32> *unfinished_threads;
2885 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
2886 tid = thread->th.th_info.ds.ds_tid;
2888 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2889 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
2891 if (task_team == NULL || current_task == NULL)
2894 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d " 2895 "*thread_finished=%d\n",
2896 gtid, final_spin, *thread_finished));
2898 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
2899 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2901 KMP_DEBUG_ASSERT(threads_data != NULL);
2903 nthreads = task_team->tt.tt_nproc;
2904 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
2905 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks ||
2906 task_team->tt.tt_hidden_helper_task_encountered);
2907 KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
2913 if (use_own_tasks) {
2914 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
2916 if ((task == NULL) && (nthreads > 1)) {
2920 if (victim_tid == -2) {
2921 victim_tid = threads_data[tid].td.td_deque_last_stolen;
2924 other_thread = threads_data[victim_tid].td.td_thr;
2926 if (victim_tid != -1) {
2928 }
else if (!new_victim) {
2934 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
2935 if (victim_tid >= tid) {
2939 other_thread = threads_data[victim_tid].td.td_thr;
2949 if ((__kmp_tasking_mode == tskm_task_teams) &&
2950 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
2951 (TCR_PTR(CCAST(
void *, other_thread->th.th_sleep_loc)) !=
2954 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
2955 other_thread->th.th_sleep_loc);
2968 task = __kmp_steal_task(other_thread, gtid, task_team,
2969 unfinished_threads, thread_finished,
2973 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
2974 threads_data[tid].td.td_deque_last_stolen = victim_tid;
2981 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
2990 #if USE_ITT_BUILD && USE_ITT_NOTIFY 2991 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2992 if (itt_sync_obj == NULL) {
2994 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2996 __kmp_itt_task_starting(itt_sync_obj);
2999 __kmp_invoke_task(gtid, task, current_task);
3001 if (itt_sync_obj != NULL)
3002 __kmp_itt_task_finished(itt_sync_obj);
3009 if (flag == NULL || (!final_spin && flag->done_check())) {
3012 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3016 if (thread->th.th_task_team == NULL) {
3019 KMP_YIELD(__kmp_library == library_throughput);
3022 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
3023 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned " 3024 "other tasks, restart\n",
3035 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0) {
3039 if (!*thread_finished) {
3042 count = KMP_ATOMIC_DEC(unfinished_threads) - 1;
3043 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec " 3044 "unfinished_threads to %d task_team=%p\n",
3045 gtid, count, task_team));
3046 *thread_finished = TRUE;
3054 if (flag != NULL && flag->done_check()) {
3057 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3065 if (thread->th.th_task_team == NULL) {
3067 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
3073 if (nthreads == 1 &&
3074 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks))
3078 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
3084 template <
bool C,
bool S>
3085 int __kmp_execute_tasks_32(
3086 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag,
int final_spin,
3087 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3088 kmp_int32 is_constrained) {
3089 return __kmp_execute_tasks_template(
3090 thread, gtid, flag, final_spin,
3091 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3094 template <
bool C,
bool S>
3095 int __kmp_execute_tasks_64(
3096 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag,
int final_spin,
3097 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3098 kmp_int32 is_constrained) {
3099 return __kmp_execute_tasks_template(
3100 thread, gtid, flag, final_spin,
3101 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3104 int __kmp_execute_tasks_oncore(
3105 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
3106 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3107 kmp_int32 is_constrained) {
3108 return __kmp_execute_tasks_template(
3109 thread, gtid, flag, final_spin,
3110 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3114 __kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
3115 kmp_flag_32<false, false> *, int,
3116 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3118 template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
3119 kmp_flag_64<false, true> *,
3121 int *USE_ITT_BUILD_ARG(
void *),
3124 template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
3125 kmp_flag_64<true, false> *,
3127 int *USE_ITT_BUILD_ARG(
void *),
3133 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
3134 kmp_info_t *this_thr) {
3135 kmp_thread_data_t *threads_data;
3136 int nthreads, i, is_init_thread;
3138 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
3139 __kmp_gtid_from_thread(this_thr)));
3141 KMP_DEBUG_ASSERT(task_team != NULL);
3142 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
3144 nthreads = task_team->tt.tt_nproc;
3145 KMP_DEBUG_ASSERT(nthreads > 0);
3146 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
3149 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
3151 if (!is_init_thread) {
3155 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
3156 __kmp_gtid_from_thread(this_thr)));
3159 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3160 KMP_DEBUG_ASSERT(threads_data != NULL);
3162 if (__kmp_tasking_mode == tskm_task_teams &&
3163 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
3167 for (i = 0; i < nthreads; i++) {
3168 volatile void *sleep_loc;
3169 kmp_info_t *thread = threads_data[i].td.td_thr;
3171 if (i == this_thr->th.th_info.ds.ds_tid) {
3180 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3182 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
3183 __kmp_gtid_from_thread(this_thr),
3184 __kmp_gtid_from_thread(thread)));
3185 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
3187 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
3188 __kmp_gtid_from_thread(this_thr),
3189 __kmp_gtid_from_thread(thread)));
3194 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
3195 __kmp_gtid_from_thread(this_thr)));
3228 static kmp_task_team_t *__kmp_free_task_teams =
3231 kmp_bootstrap_lock_t __kmp_task_team_lock =
3232 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
3239 static void __kmp_alloc_task_deque(kmp_info_t *thread,
3240 kmp_thread_data_t *thread_data) {
3241 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
3242 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
3245 thread_data->td.td_deque_last_stolen = -1;
3247 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
3248 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
3249 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
3253 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
3254 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
3258 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
3259 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
3260 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
3266 static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
3267 if (thread_data->td.td_deque != NULL) {
3268 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3269 TCW_4(thread_data->td.td_deque_ntasks, 0);
3270 __kmp_free(thread_data->td.td_deque);
3271 thread_data->td.td_deque = NULL;
3272 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3275 #ifdef BUILD_TIED_TASK_STACK 3277 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
3278 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
3280 #endif // BUILD_TIED_TASK_STACK 3290 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
3291 kmp_task_team_t *task_team) {
3292 kmp_thread_data_t **threads_data_p;
3293 kmp_int32 nthreads, maxthreads;
3294 int is_init_thread = FALSE;
3296 if (TCR_4(task_team->tt.tt_found_tasks)) {
3301 threads_data_p = &task_team->tt.tt_threads_data;
3302 nthreads = task_team->tt.tt_nproc;
3303 maxthreads = task_team->tt.tt_max_threads;
3308 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3310 if (!TCR_4(task_team->tt.tt_found_tasks)) {
3312 kmp_team_t *team = thread->th.th_team;
3315 is_init_thread = TRUE;
3316 if (maxthreads < nthreads) {
3318 if (*threads_data_p != NULL) {
3319 kmp_thread_data_t *old_data = *threads_data_p;
3320 kmp_thread_data_t *new_data = NULL;
3324 (
"__kmp_realloc_task_threads_data: T#%d reallocating " 3325 "threads data for task_team %p, new_size = %d, old_size = %d\n",
3326 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
3331 new_data = (kmp_thread_data_t *)__kmp_allocate(
3332 nthreads *
sizeof(kmp_thread_data_t));
3334 KMP_MEMCPY_S((
void *)new_data, nthreads *
sizeof(kmp_thread_data_t),
3335 (
void *)old_data, maxthreads *
sizeof(kmp_thread_data_t));
3337 #ifdef BUILD_TIED_TASK_STACK 3339 for (i = maxthreads; i < nthreads; i++) {
3340 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3341 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3343 #endif // BUILD_TIED_TASK_STACK 3345 (*threads_data_p) = new_data;
3346 __kmp_free(old_data);
3348 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating " 3349 "threads data for task_team %p, size = %d\n",
3350 __kmp_gtid_from_thread(thread), task_team, nthreads));
3354 ANNOTATE_IGNORE_WRITES_BEGIN();
3355 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
3356 nthreads *
sizeof(kmp_thread_data_t));
3357 ANNOTATE_IGNORE_WRITES_END();
3358 #ifdef BUILD_TIED_TASK_STACK 3360 for (i = 0; i < nthreads; i++) {
3361 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3362 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3364 #endif // BUILD_TIED_TASK_STACK 3366 task_team->tt.tt_max_threads = nthreads;
3369 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
3373 for (i = 0; i < nthreads; i++) {
3374 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3375 thread_data->td.td_thr = team->t.t_threads[i];
3377 if (thread_data->td.td_deque_last_stolen >= nthreads) {
3381 thread_data->td.td_deque_last_stolen = -1;
3386 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
3389 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3390 return is_init_thread;
3396 static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
3397 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3398 if (task_team->tt.tt_threads_data != NULL) {
3400 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3401 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3403 __kmp_free(task_team->tt.tt_threads_data);
3404 task_team->tt.tt_threads_data = NULL;
3406 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3413 static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
3415 kmp_task_team_t *task_team = NULL;
3418 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
3419 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
3421 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3423 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3424 if (__kmp_free_task_teams != NULL) {
3425 task_team = __kmp_free_task_teams;
3426 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
3427 task_team->tt.tt_next = NULL;
3429 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3432 if (task_team == NULL) {
3433 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating " 3434 "task team for team %p\n",
3435 __kmp_gtid_from_thread(thread), team));
3438 task_team = (kmp_task_team_t *)__kmp_allocate(
sizeof(kmp_task_team_t));
3439 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3440 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG 3443 __itt_suppress_mark_range(
3444 __itt_suppress_range, __itt_suppress_threading_errors,
3445 &task_team->tt.tt_found_tasks,
sizeof(task_team->tt.tt_found_tasks));
3446 __itt_suppress_mark_range(__itt_suppress_range,
3447 __itt_suppress_threading_errors,
3448 CCAST(kmp_uint32 *, &task_team->tt.tt_active),
3449 sizeof(task_team->tt.tt_active));
3457 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3458 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3459 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
3461 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads);
3462 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3463 TCW_4(task_team->tt.tt_active, TRUE);
3465 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p " 3466 "unfinished_threads init'd to %d\n",
3467 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
3468 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
3475 void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
3476 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
3477 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
3480 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3482 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
3483 task_team->tt.tt_next = __kmp_free_task_teams;
3484 TCW_PTR(__kmp_free_task_teams, task_team);
3486 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3494 void __kmp_reap_task_teams(
void) {
3495 kmp_task_team_t *task_team;
3497 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3499 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3500 while ((task_team = __kmp_free_task_teams) != NULL) {
3501 __kmp_free_task_teams = task_team->tt.tt_next;
3502 task_team->tt.tt_next = NULL;
3505 if (task_team->tt.tt_threads_data != NULL) {
3506 __kmp_free_task_threads_data(task_team);
3508 __kmp_free(task_team);
3510 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3517 void __kmp_wait_to_unref_task_teams(
void) {
3522 KMP_INIT_YIELD(spins);
3530 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
3531 thread = thread->th.th_next_pool) {
3535 if (TCR_PTR(thread->th.th_task_team) == NULL) {
3536 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
3537 __kmp_gtid_from_thread(thread)));
3542 if (!__kmp_is_thread_alive(thread, &exit_val)) {
3543 thread->th.th_task_team = NULL;
3550 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to " 3551 "unreference task_team\n",
3552 __kmp_gtid_from_thread(thread)));
3554 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
3555 volatile void *sleep_loc;
3557 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3561 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
3562 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
3563 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
3572 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
3578 void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
int always) {
3579 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3585 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
3586 (always || team->t.t_nproc > 1)) {
3587 team->t.t_task_team[this_thr->th.th_task_state] =
3588 __kmp_allocate_task_team(this_thr, team);
3589 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p " 3590 "for team %d at parity=%d\n",
3591 __kmp_gtid_from_thread(this_thr),
3592 team->t.t_task_team[this_thr->th.th_task_state],
3593 ((team != NULL) ? team->t.t_id : -1),
3594 this_thr->th.th_task_state));
3604 if (team->t.t_nproc > 1) {
3605 int other_team = 1 - this_thr->th.th_task_state;
3606 if (team->t.t_task_team[other_team] == NULL) {
3607 team->t.t_task_team[other_team] =
3608 __kmp_allocate_task_team(this_thr, team);
3609 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created second new " 3610 "task_team %p for team %d at parity=%d\n",
3611 __kmp_gtid_from_thread(this_thr),
3612 team->t.t_task_team[other_team],
3613 ((team != NULL) ? team->t.t_id : -1), other_team));
3616 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
3617 if (!task_team->tt.tt_active ||
3618 team->t.t_nproc != task_team->tt.tt_nproc) {
3619 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
3620 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3621 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3622 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,
3624 TCW_4(task_team->tt.tt_active, TRUE);
3628 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d reset next task_team " 3629 "%p for team %d at parity=%d\n",
3630 __kmp_gtid_from_thread(this_thr),
3631 team->t.t_task_team[other_team],
3632 ((team != NULL) ? team->t.t_id : -1), other_team));
3640 if (this_thr == __kmp_hidden_helper_main_thread) {
3641 for (
int i = 0; i < 2; ++i) {
3642 kmp_task_team_t *task_team = team->t.t_task_team[i];
3643 if (KMP_TASKING_ENABLED(task_team)) {
3646 __kmp_enable_tasking(task_team, this_thr);
3647 for (
int j = 0; j < task_team->tt.tt_nproc; ++j) {
3648 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
3649 if (thread_data->td.td_deque == NULL) {
3650 __kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data);
3660 void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
3661 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3665 this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
3669 TCW_PTR(this_thr->th.th_task_team,
3670 team->t.t_task_team[this_thr->th.th_task_state]);
3672 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team " 3673 "%p from Team #%d (parity=%d)\n",
3674 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
3675 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
3685 void __kmp_task_team_wait(
3686 kmp_info_t *this_thr,
3687 kmp_team_t *team USE_ITT_BUILD_ARG(
void *itt_sync_obj),
int wait) {
3688 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
3690 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3691 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
3693 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
3695 KA_TRACE(20, (
"__kmp_task_team_wait: Master T#%d waiting for all tasks " 3696 "(for unfinished_threads to reach 0) on task_team = %p\n",
3697 __kmp_gtid_from_thread(this_thr), task_team));
3701 kmp_flag_32<false, false> flag(
3702 RCAST(std::atomic<kmp_uint32> *,
3703 &task_team->tt.tt_unfinished_threads),
3705 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
3711 (
"__kmp_task_team_wait: Master T#%d deactivating task_team %p: " 3712 "setting active to false, setting local and team's pointer to NULL\n",
3713 __kmp_gtid_from_thread(this_thr), task_team));
3714 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
3715 task_team->tt.tt_found_proxy_tasks == TRUE);
3716 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3717 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
3718 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
3721 TCW_PTR(this_thr->th.th_task_team, NULL);
3730 void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid) {
3731 std::atomic<kmp_uint32> *spin = RCAST(
3732 std::atomic<kmp_uint32> *,
3733 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
3735 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
3738 KMP_FSYNC_SPIN_INIT(spin, NULL);
3740 kmp_flag_32<false, false> spin_flag(spin, 0U);
3741 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
3742 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
3745 KMP_FSYNC_SPIN_PREPARE(RCAST(
void *, spin));
3748 if (TCR_4(__kmp_global.g.g_done)) {
3749 if (__kmp_global.g.g_abort)
3750 __kmp_abort_thread();
3756 KMP_FSYNC_SPIN_ACQUIRED(RCAST(
void *, spin));
3765 static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
3767 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3768 kmp_task_team_t *task_team = taskdata->td_task_team;
3770 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
3774 KMP_DEBUG_ASSERT(task_team != NULL);
3776 bool result =
false;
3777 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
3779 if (thread_data->td.td_deque == NULL) {
3783 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
3788 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3789 TASK_DEQUE_SIZE(thread_data->td)) {
3792 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
3797 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3800 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3801 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3802 TASK_DEQUE_SIZE(thread_data->td)) {
3804 __kmp_realloc_task_deque(thread, thread_data);
3809 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3811 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3812 TASK_DEQUE_SIZE(thread_data->td)) {
3813 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to " 3819 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3820 goto release_and_exit;
3822 __kmp_realloc_task_deque(thread, thread_data);
3828 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
3830 thread_data->td.td_deque_tail =
3831 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
3832 TCW_4(thread_data->td.td_deque_ntasks,
3833 TCR_4(thread_data->td.td_deque_ntasks) + 1);
3836 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
3840 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3861 static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3862 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
3863 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3864 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
3865 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
3867 taskdata->td_flags.complete = 1;
3869 if (taskdata->td_taskgroup)
3870 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
3874 KMP_ATOMIC_INC(&taskdata->td_incomplete_child_tasks);
3877 static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3878 kmp_int32 children = 0;
3882 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
3883 KMP_DEBUG_ASSERT(children >= 0);
3886 KMP_ATOMIC_DEC(&taskdata->td_incomplete_child_tasks);
3889 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
3890 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3891 kmp_info_t *thread = __kmp_threads[gtid];
3893 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3894 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
3899 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) > 0)
3902 __kmp_release_deps(gtid, taskdata);
3903 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
3915 KMP_DEBUG_ASSERT(ptask != NULL);
3916 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3918 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
3920 __kmp_assert_valid_gtid(gtid);
3921 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3923 __kmp_first_top_half_finish_proxy(taskdata);
3924 __kmp_second_top_half_finish_proxy(taskdata);
3925 __kmp_bottom_half_finish_proxy(gtid, ptask);
3928 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
3940 KMP_DEBUG_ASSERT(ptask != NULL);
3941 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3945 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
3948 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3950 __kmp_first_top_half_finish_proxy(taskdata);
3954 kmp_team_t *team = taskdata->td_team;
3955 kmp_int32 nthreads = team->t.t_nproc;
3960 kmp_int32 start_k = 0;
3962 kmp_int32 k = start_k;
3966 thread = team->t.t_threads[k];
3967 k = (k + 1) % nthreads;
3973 }
while (!__kmp_give_task(thread, k, ptask, pass));
3975 __kmp_second_top_half_finish_proxy(taskdata);
3979 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
3983 kmp_event_t *__kmpc_task_allow_completion_event(
ident_t *loc_ref,
int gtid,
3985 kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
3986 if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
3987 td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
3988 td->td_allow_completion_event.ed.task = task;
3989 __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
3991 return &td->td_allow_completion_event;
3994 void __kmp_fulfill_event(kmp_event_t *event) {
3995 if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
3996 kmp_task_t *ptask = event->ed.task;
3997 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3998 bool detached =
false;
3999 int gtid = __kmp_get_gtid();
4004 __kmp_acquire_tas_lock(&event->lock, gtid);
4005 if (taskdata->td_flags.proxy == TASK_PROXY) {
4011 if (UNLIKELY(ompt_enabled.enabled))
4012 __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
4015 event->type = KMP_EVENT_UNINITIALIZED;
4016 __kmp_release_tas_lock(&event->lock, gtid);
4022 if (UNLIKELY(ompt_enabled.enabled))
4023 __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
4027 kmp_team_t *team = taskdata->td_team;
4028 kmp_info_t *thread = __kmp_get_thread();
4029 if (thread->th.th_team == team) {
4047 kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
4049 kmp_taskdata_t *taskdata;
4050 kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
4051 kmp_taskdata_t *parent_task = taskdata_src->td_parent;
4052 size_t shareds_offset;
4055 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
4057 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
4059 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
4060 task_size = taskdata_src->td_size_alloc;
4063 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
4066 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
4068 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
4070 KMP_MEMCPY(taskdata, taskdata_src, task_size);
4072 task = KMP_TASKDATA_TO_TASK(taskdata);
4075 taskdata->td_task_id = KMP_GEN_TASK_ID();
4076 if (task->shareds != NULL) {
4077 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
4078 task->shareds = &((
char *)taskdata)[shareds_offset];
4079 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
4082 taskdata->td_alloc_thread = thread;
4083 taskdata->td_parent = parent_task;
4085 taskdata->td_taskgroup = parent_task->td_taskgroup;
4088 if (taskdata->td_flags.tiedness == TASK_TIED)
4089 taskdata->td_last_tied = taskdata;
4093 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
4094 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
4095 if (parent_task->td_taskgroup)
4096 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
4099 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
4100 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
4104 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
4105 thread, taskdata, taskdata->td_parent));
4107 if (UNLIKELY(ompt_enabled.enabled))
4108 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
4117 typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
4119 KMP_BUILD_ASSERT(
sizeof(
long) == 4 ||
sizeof(
long) == 8);
4124 class kmp_taskloop_bounds_t {
4126 const kmp_taskdata_t *taskdata;
4127 size_t lower_offset;
4128 size_t upper_offset;
4131 kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
4132 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
4133 lower_offset((
char *)lb - (
char *)task),
4134 upper_offset((
char *)ub - (
char *)task) {
4135 KMP_DEBUG_ASSERT((
char *)lb > (
char *)_task);
4136 KMP_DEBUG_ASSERT((
char *)ub > (
char *)_task);
4138 kmp_taskloop_bounds_t(kmp_task_t *_task,
const kmp_taskloop_bounds_t &bounds)
4139 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
4140 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
4141 size_t get_lower_offset()
const {
return lower_offset; }
4142 size_t get_upper_offset()
const {
return upper_offset; }
4143 kmp_uint64 get_lb()
const {
4145 #if defined(KMP_GOMP_COMPAT) 4147 if (!taskdata->td_flags.native) {
4148 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4151 if (taskdata->td_size_loop_bounds == 4) {
4152 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
4153 retval = (kmp_int64)*lb;
4155 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
4156 retval = (kmp_int64)*lb;
4160 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4161 #endif // defined(KMP_GOMP_COMPAT) 4164 kmp_uint64 get_ub()
const {
4166 #if defined(KMP_GOMP_COMPAT) 4168 if (!taskdata->td_flags.native) {
4169 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4172 if (taskdata->td_size_loop_bounds == 4) {
4173 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
4174 retval = (kmp_int64)*ub;
4176 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
4177 retval = (kmp_int64)*ub;
4181 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4182 #endif // defined(KMP_GOMP_COMPAT) 4185 void set_lb(kmp_uint64 lb) {
4186 #if defined(KMP_GOMP_COMPAT) 4188 if (!taskdata->td_flags.native) {
4189 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4192 if (taskdata->td_size_loop_bounds == 4) {
4193 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
4194 *lower = (kmp_uint32)lb;
4196 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
4197 *lower = (kmp_uint64)lb;
4201 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4202 #endif // defined(KMP_GOMP_COMPAT) 4204 void set_ub(kmp_uint64 ub) {
4205 #if defined(KMP_GOMP_COMPAT) 4207 if (!taskdata->td_flags.native) {
4208 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4211 if (taskdata->td_size_loop_bounds == 4) {
4212 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
4213 *upper = (kmp_uint32)ub;
4215 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
4216 *upper = (kmp_uint64)ub;
4220 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4221 #endif // defined(KMP_GOMP_COMPAT) 4241 void __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
4242 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4243 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4244 kmp_uint64 grainsize, kmp_uint64 extras,
4245 kmp_int64 last_chunk, kmp_uint64 tc,
4251 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
4252 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4254 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4255 kmp_uint64 lower = task_bounds.get_lb();
4256 kmp_uint64 upper = task_bounds.get_ub();
4258 kmp_info_t *thread = __kmp_threads[gtid];
4259 kmp_taskdata_t *current_task = thread->th.th_current_task;
4260 kmp_task_t *next_task;
4261 kmp_int32 lastpriv = 0;
4264 tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras));
4265 KMP_DEBUG_ASSERT(num_tasks > extras);
4266 KMP_DEBUG_ASSERT(num_tasks > 0);
4267 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, " 4268 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
4269 gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
4270 ub_glob, st, task_dup));
4273 for (i = 0; i < num_tasks; ++i) {
4274 kmp_uint64 chunk_minus_1;
4276 chunk_minus_1 = grainsize - 1;
4278 chunk_minus_1 = grainsize;
4281 upper = lower + st * chunk_minus_1;
4285 if (i == num_tasks - 1) {
4288 KMP_DEBUG_ASSERT(upper == *ub);
4289 if (upper == ub_glob)
4291 }
else if (st > 0) {
4292 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
4293 if ((kmp_uint64)st > ub_glob - upper)
4296 KMP_DEBUG_ASSERT(upper + st < *ub);
4297 if (upper - ub_glob < (kmp_uint64)(-st))
4301 next_task = __kmp_task_dup_alloc(thread, task);
4302 kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
4303 kmp_taskloop_bounds_t next_task_bounds =
4304 kmp_taskloop_bounds_t(next_task, task_bounds);
4307 next_task_bounds.set_lb(lower);
4308 if (next_taskdata->td_flags.native) {
4309 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
4311 next_task_bounds.set_ub(upper);
4313 if (ptask_dup != NULL)
4315 ptask_dup(next_task, task, lastpriv);
4317 (
"__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, " 4318 "upper %lld stride %lld, (offsets %p %p)\n",
4319 gtid, i, next_task, lower, upper, st,
4320 next_task_bounds.get_lower_offset(),
4321 next_task_bounds.get_upper_offset()));
4323 __kmp_omp_taskloop_task(NULL, gtid, next_task,
4326 __kmp_omp_task(gtid, next_task,
true);
4331 __kmp_task_start(gtid, task, current_task);
4333 __kmp_task_finish<false>(gtid, task, current_task);
4338 typedef struct __taskloop_params {
4345 kmp_uint64 num_tasks;
4346 kmp_uint64 grainsize;
4348 kmp_int64 last_chunk;
4350 kmp_uint64 num_t_min;
4354 } __taskloop_params_t;
4356 void __kmp_taskloop_recur(
ident_t *,
int, kmp_task_t *, kmp_uint64 *,
4357 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
4358 kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
4366 int __kmp_taskloop_task(
int gtid,
void *ptask) {
4367 __taskloop_params_t *p =
4368 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
4369 kmp_task_t *task = p->task;
4370 kmp_uint64 *lb = p->lb;
4371 kmp_uint64 *ub = p->ub;
4372 void *task_dup = p->task_dup;
4374 kmp_int64 st = p->st;
4375 kmp_uint64 ub_glob = p->ub_glob;
4376 kmp_uint64 num_tasks = p->num_tasks;
4377 kmp_uint64 grainsize = p->grainsize;
4378 kmp_uint64 extras = p->extras;
4379 kmp_int64 last_chunk = p->last_chunk;
4380 kmp_uint64 tc = p->tc;
4381 kmp_uint64 num_t_min = p->num_t_min;
4383 void *codeptr_ra = p->codeptr_ra;
4386 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4387 KMP_DEBUG_ASSERT(task != NULL);
4389 (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize" 4390 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
4391 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
4394 KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
4395 if (num_tasks > num_t_min)
4396 __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4397 grainsize, extras, last_chunk, tc, num_t_min,
4403 __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4404 grainsize, extras, last_chunk, tc,
4410 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
4432 void __kmp_taskloop_recur(
ident_t *loc,
int gtid, kmp_task_t *task,
4433 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4434 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4435 kmp_uint64 grainsize, kmp_uint64 extras,
4436 kmp_int64 last_chunk, kmp_uint64 tc,
4437 kmp_uint64 num_t_min,
4442 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4443 KMP_DEBUG_ASSERT(task != NULL);
4444 KMP_DEBUG_ASSERT(num_tasks > num_t_min);
4446 (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize" 4447 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
4448 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
4450 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4451 kmp_uint64 lower = *lb;
4452 kmp_info_t *thread = __kmp_threads[gtid];
4454 kmp_task_t *next_task;
4455 size_t lower_offset =
4456 (
char *)lb - (
char *)task;
4457 size_t upper_offset =
4458 (
char *)ub - (
char *)task;
4461 tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras));
4462 KMP_DEBUG_ASSERT(num_tasks > extras);
4463 KMP_DEBUG_ASSERT(num_tasks > 0);
4466 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
4467 kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
4468 kmp_uint64 gr_size0 = grainsize;
4469 kmp_uint64 n_tsk0 = num_tasks >> 1;
4470 kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
4471 if (last_chunk < 0) {
4473 last_chunk1 = last_chunk;
4474 tc0 = grainsize * n_tsk0;
4476 }
else if (n_tsk0 <= extras) {
4479 ext1 = extras - n_tsk0;
4480 tc0 = gr_size0 * n_tsk0;
4485 tc1 = grainsize * n_tsk1;
4488 ub0 = lower + st * (tc0 - 1);
4492 next_task = __kmp_task_dup_alloc(thread, task);
4494 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
4495 if (ptask_dup != NULL)
4496 ptask_dup(next_task, task, 0);
4501 kmp_taskdata_t *current_task = thread->th.th_current_task;
4502 thread->th.th_current_task = taskdata->td_parent;
4503 kmp_task_t *new_task =
4504 __kmpc_omp_task_alloc(loc, gtid, 1, 3 *
sizeof(
void *),
4505 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
4507 thread->th.th_current_task = current_task;
4508 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
4509 p->task = next_task;
4510 p->lb = (kmp_uint64 *)((
char *)next_task + lower_offset);
4511 p->ub = (kmp_uint64 *)((
char *)next_task + upper_offset);
4512 p->task_dup = task_dup;
4514 p->ub_glob = ub_glob;
4515 p->num_tasks = n_tsk1;
4516 p->grainsize = grainsize;
4518 p->last_chunk = last_chunk1;
4520 p->num_t_min = num_t_min;
4522 p->codeptr_ra = codeptr_ra;
4527 __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
4529 __kmp_omp_task(gtid, new_task,
true);
4533 if (n_tsk0 > num_t_min)
4534 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
4535 ext0, last_chunk0, tc0, num_t_min,
4541 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
4542 gr_size0, ext0, last_chunk0, tc0,
4548 KA_TRACE(40, (
"__kmp_taskloop_recur(exit): T#%d\n", gtid));
4551 static void __kmp_taskloop(
ident_t *loc,
int gtid, kmp_task_t *task,
int if_val,
4552 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4553 int nogroup,
int sched, kmp_uint64 grainsize,
4554 int modifier,
void *task_dup) {
4555 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4556 KMP_DEBUG_ASSERT(task != NULL);
4558 #if OMPT_SUPPORT && OMPT_OPTIONAL 4559 OMPT_STORE_RETURN_ADDRESS(gtid);
4561 __kmpc_taskgroup(loc, gtid);
4566 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4569 kmp_uint64 lower = task_bounds.get_lb();
4570 kmp_uint64 upper = task_bounds.get_ub();
4571 kmp_uint64 ub_glob = upper;
4572 kmp_uint64 num_tasks = 0, extras = 0;
4573 kmp_int64 last_chunk =
4575 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
4576 kmp_info_t *thread = __kmp_threads[gtid];
4577 kmp_taskdata_t *current_task = thread->th.th_current_task;
4579 KA_TRACE(20, (
"__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, " 4580 "grain %llu(%d, %d), dup %p\n",
4581 gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
4586 tc = upper - lower + 1;
4587 }
else if (st < 0) {
4588 tc = (lower - upper) / (-st) + 1;
4590 tc = (upper - lower) / st + 1;
4593 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
4595 __kmp_task_start(gtid, task, current_task);
4597 __kmp_task_finish<false>(gtid, task, current_task);
4601 #if OMPT_SUPPORT && OMPT_OPTIONAL 4602 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
4603 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
4604 if (ompt_enabled.ompt_callback_work) {
4605 ompt_callbacks.ompt_callback(ompt_callback_work)(
4606 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
4607 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4611 if (num_tasks_min == 0)
4614 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
4620 grainsize = thread->th.th_team_nproc * 10;
4623 if (grainsize > tc) {
4628 num_tasks = grainsize;
4629 grainsize = tc / num_tasks;
4630 extras = tc % num_tasks;
4634 if (grainsize > tc) {
4640 num_tasks = (tc + grainsize - 1) / grainsize;
4641 last_chunk = tc - (num_tasks * grainsize);
4644 num_tasks = tc / grainsize;
4646 grainsize = tc / num_tasks;
4647 extras = tc % num_tasks;
4652 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
4656 tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras));
4657 KMP_DEBUG_ASSERT(num_tasks > extras);
4658 KMP_DEBUG_ASSERT(num_tasks > 0);
4664 taskdata->td_flags.task_serial = 1;
4665 taskdata->td_flags.tiedness = TASK_TIED;
4667 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4668 grainsize, extras, last_chunk, tc,
4670 OMPT_GET_RETURN_ADDRESS(0),
4675 }
else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
4676 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu" 4677 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
4678 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
4680 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4681 grainsize, extras, last_chunk, tc, num_tasks_min,
4683 OMPT_GET_RETURN_ADDRESS(0),
4687 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu" 4688 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
4689 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
4691 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4692 grainsize, extras, last_chunk, tc,
4694 OMPT_GET_RETURN_ADDRESS(0),
4699 #if OMPT_SUPPORT && OMPT_OPTIONAL 4700 if (ompt_enabled.ompt_callback_work) {
4701 ompt_callbacks.ompt_callback(ompt_callback_work)(
4702 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
4703 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
4708 #if OMPT_SUPPORT && OMPT_OPTIONAL 4709 OMPT_STORE_RETURN_ADDRESS(gtid);
4711 __kmpc_end_taskgroup(loc, gtid);
4713 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d\n", gtid));
4733 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup,
4734 int sched, kmp_uint64 grainsize,
void *task_dup) {
4735 __kmp_assert_valid_gtid(gtid);
4736 KA_TRACE(20, (
"__kmpc_taskloop(enter): T#%d\n", gtid));
4737 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
4739 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
4760 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4761 int nogroup,
int sched, kmp_uint64 grainsize,
4762 int modifier,
void *task_dup) {
4763 __kmp_assert_valid_gtid(gtid);
4764 KA_TRACE(20, (
"__kmpc_taskloop_5(enter): T#%d\n", gtid));
4765 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
4766 modifier, task_dup);
4767 KA_TRACE(20, (
"__kmpc_taskloop_5(exit): T#%d\n", gtid));
void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws)
void * __kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup)
struct kmp_taskred_input kmp_taskred_input_t
kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, kmp_task_affinity_info_t *affin_list)
void * __kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
struct kmp_taskred_data kmp_taskred_data_t
void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void * __kmpc_taskred_init(int gtid, int num, void *data)
void * __kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data)
void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask)
struct kmp_taskred_flags kmp_taskred_flags_t
void * __kmpc_task_reduction_init(int gtid, int num, void *data)
kmp_taskred_flags_t flags
void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, int modifier, void *task_dup)
struct kmp_task_red_input kmp_task_red_input_t