14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
28#include "kmp_dispatch_hier.h"
32#include "ompt-specific.h"
35#include "ompd-specific.h"
38#if OMP_PROFILING_SUPPORT
39#include "llvm/Support/TimeProfiler.h"
40static char *ProfileTraceFile =
nullptr;
44#define KMP_USE_PRCTL 0
59#if defined(KMP_GOMP_COMPAT)
60char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77kmp_info_t __kmp_monitor;
82void __kmp_cleanup(
void);
84static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89#if KMP_AFFINITY_SUPPORTED
90static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93static void __kmp_do_serial_initialize(
void);
94void __kmp_fork_barrier(
int gtid,
int tid);
95void __kmp_join_barrier(
int gtid);
96void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99#ifdef USE_LOAD_BALANCE
100static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103static int __kmp_expand_threads(
int nNeed);
105static int __kmp_unregister_root_other_thread(
int gtid);
107static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
110void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
112void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
117int __kmp_get_global_thread_id() {
119 kmp_info_t **other_threads;
127 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
128 __kmp_nth, __kmp_all_nth));
135 if (!TCR_4(__kmp_init_gtid))
139 if (TCR_4(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
144 if (TCR_4(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
146 return __kmp_gtid_get_specific();
148 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
150 stack_addr = (
char *)&stack_data;
151 other_threads = __kmp_threads;
164 for (i = 0; i < __kmp_threads_capacity; i++) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
171 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
178 if (stack_diff <= stack_size) {
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
189 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
190 "thread, using TLS\n"));
191 i = __kmp_gtid_get_specific();
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i);
205 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
213 stack_base - stack_addr);
217 if (__kmp_storage_map) {
218 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
227int __kmp_get_global_thread_id_reg() {
230 if (!__kmp_init_serial) {
234 if (TCR_4(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
239 if (TCR_4(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
241 gtid = __kmp_gtid_get_specific();
244 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
245 gtid = __kmp_get_global_thread_id();
249 if (gtid == KMP_GTID_DNE) {
251 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
252 "Registering a new gtid.\n"));
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
258 gtid = __kmp_register_root(FALSE);
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
264 KMP_DEBUG_ASSERT(gtid >= 0);
270void __kmp_check_stack_overlap(kmp_info_t *th) {
272 char *stack_beg = NULL;
273 char *stack_end = NULL;
276 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
277 if (__kmp_storage_map) {
278 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
281 gtid = __kmp_gtid_from_thread(th);
283 if (gtid == KMP_GTID_MONITOR) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)",
"mon",
287 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
301 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
302 if (stack_beg == NULL) {
303 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
325 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
331 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
336void __kmp_infinite_loop(
void) {
337 static int done = FALSE;
344#define MAX_MESSAGE 512
346void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE];
351 va_start(ap, format);
352 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (
unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356#if KMP_PRINT_DATA_PLACEMENT
359 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
363 __kmp_storage_map_verbose = FALSE;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
369 const int page_size = KMP_GET_PAGE_SIZE();
371 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
374 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
377 __kmp_printf_no_lock(
" GTID %d\n", gtid);
386 (
char *)p1 += page_size;
387 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
392 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
393 (
char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
396 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
397 (
char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
404 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
410void __kmp_warn(
char const *format, ...) {
411 char buffer[MAX_MESSAGE];
414 if (__kmp_generate_warnings == kmp_warnings_off) {
418 va_start(ap, format);
420 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
428void __kmp_abort_process() {
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
436 if (KMP_OS_WINDOWS) {
439 __kmp_global.g.g_abort = SIGABRT;
453 __kmp_unregister_library();
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
462void __kmp_abort_thread(
void) {
465 __kmp_infinite_loop();
471static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
495#if KMP_FAST_REDUCTION_BARRIER
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
506static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
507 int team_id,
int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
527#if KMP_FAST_REDUCTION_BARRIER
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
548static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
552static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
559BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
564 case DLL_PROCESS_ATTACH:
565 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
569 case DLL_PROCESS_DETACH:
570 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
583 if (lpReserved == NULL)
584 __kmp_internal_end_library(__kmp_gtid_get_specific());
588 case DLL_THREAD_ATTACH:
589 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
595 case DLL_THREAD_DETACH:
596 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
598 __kmp_internal_end_thread(__kmp_gtid_get_specific());
609void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
610 int gtid = *gtid_ref;
611#ifdef BUILD_PARALLEL_ORDERED
612 kmp_team_t *team = __kmp_team_from_gtid(gtid);
615 if (__kmp_env_consistency_check) {
616 if (__kmp_threads[gtid]->th.th_root->r.r_active)
617#if KMP_USE_DYNAMIC_LOCK
618 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
620 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
623#ifdef BUILD_PARALLEL_ORDERED
624 if (!team->t.t_serialized) {
626 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
634void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
635 int gtid = *gtid_ref;
636#ifdef BUILD_PARALLEL_ORDERED
637 int tid = __kmp_tid_from_gtid(gtid);
638 kmp_team_t *team = __kmp_team_from_gtid(gtid);
641 if (__kmp_env_consistency_check) {
642 if (__kmp_threads[gtid]->th.th_root->r.r_active)
643 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
645#ifdef BUILD_PARALLEL_ORDERED
646 if (!team->t.t_serialized) {
651 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
661int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
666 if (!TCR_4(__kmp_init_parallel))
667 __kmp_parallel_initialize();
668 __kmp_resume_if_soft_paused();
670 th = __kmp_threads[gtid];
671 team = th->th.th_team;
674 th->th.th_ident = id_ref;
676 if (team->t.t_serialized) {
679 kmp_int32 old_this = th->th.th_local.this_construct;
681 ++th->th.th_local.this_construct;
685 if (team->t.t_construct == old_this) {
686 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
687 th->th.th_local.this_construct);
690 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
691 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
692 team->t.t_active_level == 1) {
694 __kmp_itt_metadata_single(id_ref);
699 if (__kmp_env_consistency_check) {
700 if (status && push_ws) {
701 __kmp_push_workshare(gtid, ct_psingle, id_ref);
703 __kmp_check_workshare(gtid, ct_psingle, id_ref);
708 __kmp_itt_single_start(gtid);
714void __kmp_exit_single(
int gtid) {
716 __kmp_itt_single_end(gtid);
718 if (__kmp_env_consistency_check)
719 __kmp_pop_workshare(gtid, ct_psingle, NULL);
728static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
729 int master_tid,
int set_nthreads,
733 KMP_DEBUG_ASSERT(__kmp_init_serial);
734 KMP_DEBUG_ASSERT(root && parent_team);
735 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
739 new_nthreads = set_nthreads;
740 if (!get__dynamic_2(parent_team, master_tid)) {
743#ifdef USE_LOAD_BALANCE
744 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
745 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
746 if (new_nthreads == 1) {
747 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
748 "reservation to 1 thread\n",
752 if (new_nthreads < set_nthreads) {
753 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
754 "reservation to %d threads\n",
755 master_tid, new_nthreads));
759 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
760 new_nthreads = __kmp_avail_proc - __kmp_nth +
761 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
762 if (new_nthreads <= 1) {
763 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
764 "reservation to 1 thread\n",
768 if (new_nthreads < set_nthreads) {
769 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
770 "reservation to %d threads\n",
771 master_tid, new_nthreads));
773 new_nthreads = set_nthreads;
775 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
776 if (set_nthreads > 2) {
777 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
778 new_nthreads = (new_nthreads % set_nthreads) + 1;
779 if (new_nthreads == 1) {
780 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
781 "reservation to 1 thread\n",
785 if (new_nthreads < set_nthreads) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
787 "reservation to %d threads\n",
788 master_tid, new_nthreads));
796 if (__kmp_nth + new_nthreads -
797 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
799 int tl_nthreads = __kmp_max_nth - __kmp_nth +
800 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
801 if (tl_nthreads <= 0) {
806 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
807 __kmp_reserve_warn = 1;
808 __kmp_msg(kmp_ms_warning,
809 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
810 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
812 if (tl_nthreads == 1) {
813 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
814 "reduced reservation to 1 thread\n",
818 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
819 "reservation to %d threads\n",
820 master_tid, tl_nthreads));
821 new_nthreads = tl_nthreads;
825 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
826 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
827 if (cg_nthreads + new_nthreads -
828 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
830 int tl_nthreads = max_cg_threads - cg_nthreads +
831 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
832 if (tl_nthreads <= 0) {
837 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
838 __kmp_reserve_warn = 1;
839 __kmp_msg(kmp_ms_warning,
840 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
841 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
843 if (tl_nthreads == 1) {
844 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
845 "reduced reservation to 1 thread\n",
849 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
850 "reservation to %d threads\n",
851 master_tid, tl_nthreads));
852 new_nthreads = tl_nthreads;
858 capacity = __kmp_threads_capacity;
859 if (TCR_PTR(__kmp_threads[0]) == NULL) {
865 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
866 capacity -= __kmp_hidden_helper_threads_num;
868 if (__kmp_nth + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
872 int slotsRequired = __kmp_nth + new_nthreads -
873 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
875 int slotsAdded = __kmp_expand_threads(slotsRequired);
876 if (slotsAdded < slotsRequired) {
878 new_nthreads -= (slotsRequired - slotsAdded);
879 KMP_ASSERT(new_nthreads >= 1);
882 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
883 __kmp_reserve_warn = 1;
884 if (__kmp_tp_cached) {
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
887 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
888 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
890 __kmp_msg(kmp_ms_warning,
891 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
892 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
899 if (new_nthreads == 1) {
901 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
902 "dead roots and rechecking; requested %d threads\n",
903 __kmp_get_gtid(), set_nthreads));
905 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
907 __kmp_get_gtid(), new_nthreads, set_nthreads));
916static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
917 kmp_info_t *master_th,
int master_gtid,
918 int fork_teams_workers) {
922 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
923 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
927 master_th->th.th_info.ds.ds_tid = 0;
928 master_th->th.th_team = team;
929 master_th->th.th_team_nproc = team->t.t_nproc;
930 master_th->th.th_team_master = master_th;
931 master_th->th.th_team_serialized = FALSE;
932 master_th->th.th_dispatch = &team->t.t_dispatch[0];
935#if KMP_NESTED_HOT_TEAMS
937 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
940 int level = team->t.t_active_level - 1;
941 if (master_th->th.th_teams_microtask) {
942 if (master_th->th.th_teams_size.nteams > 1) {
946 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
947 master_th->th.th_teams_level == team->t.t_level) {
952 if (level < __kmp_hot_teams_max_level) {
953 if (hot_teams[level].hot_team) {
955 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
959 hot_teams[level].hot_team = team;
960 hot_teams[level].hot_team_nth = team->t.t_nproc;
967 use_hot_team = team == root->r.r_hot_team;
972 team->t.t_threads[0] = master_th;
973 __kmp_initialize_info(master_th, team, 0, master_gtid);
976 for (i = 1; i < team->t.t_nproc; i++) {
979 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
980 team->t.t_threads[i] = thr;
981 KMP_DEBUG_ASSERT(thr);
982 KMP_DEBUG_ASSERT(thr->th.th_team == team);
984 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
985 "T#%d(%d:%d) join =%llu, plain=%llu\n",
986 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
987 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
988 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
989 team->t.t_bar[bs_plain_barrier].b_arrived));
990 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
991 thr->th.th_teams_level = master_th->th.th_teams_level;
992 thr->th.th_teams_size = master_th->th.th_teams_size;
995 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
996 for (b = 0; b < bs_last_barrier; ++b) {
997 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
998 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1000 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1006#if KMP_AFFINITY_SUPPORTED
1010 if (!fork_teams_workers) {
1011 __kmp_partition_places(team);
1016 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1017 for (i = 0; i < team->t.t_nproc; i++) {
1018 kmp_info_t *thr = team->t.t_threads[i];
1019 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1020 thr->th.th_prev_level != team->t.t_level) {
1021 team->t.t_display_affinity = 1;
1030#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1034inline static void propagateFPControl(kmp_team_t *team) {
1035 if (__kmp_inherit_fp_control) {
1036 kmp_int16 x87_fpu_control_word;
1040 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1041 __kmp_store_mxcsr(&mxcsr);
1042 mxcsr &= KMP_X86_MXCSR_MASK;
1053 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1054 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1057 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1061 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1067inline static void updateHWFPControl(kmp_team_t *team) {
1068 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1071 kmp_int16 x87_fpu_control_word;
1073 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1074 __kmp_store_mxcsr(&mxcsr);
1075 mxcsr &= KMP_X86_MXCSR_MASK;
1077 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1078 __kmp_clear_x87_fpu_status_word();
1079 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1082 if (team->t.t_mxcsr != mxcsr) {
1083 __kmp_load_mxcsr(&team->t.t_mxcsr);
1088#define propagateFPControl(x) ((void)0)
1089#define updateHWFPControl(x) ((void)0)
1092static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1097void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1098 kmp_info_t *this_thr;
1099 kmp_team_t *serial_team;
1101 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1108 if (!TCR_4(__kmp_init_parallel))
1109 __kmp_parallel_initialize();
1110 __kmp_resume_if_soft_paused();
1112 this_thr = __kmp_threads[global_tid];
1113 serial_team = this_thr->th.th_serial_team;
1116 KMP_DEBUG_ASSERT(serial_team);
1119 if (__kmp_tasking_mode != tskm_immediate_exec) {
1121 this_thr->th.th_task_team ==
1122 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1123 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1125 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1126 "team %p, new task_team = NULL\n",
1127 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1128 this_thr->th.th_task_team = NULL;
1131 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1132 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1133 proc_bind = proc_bind_false;
1134 }
else if (proc_bind == proc_bind_default) {
1137 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1140 this_thr->th.th_set_proc_bind = proc_bind_default;
1143 ompt_data_t ompt_parallel_data = ompt_data_none;
1144 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1145 if (ompt_enabled.enabled &&
1146 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1148 ompt_task_info_t *parent_task_info;
1149 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1151 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1152 if (ompt_enabled.ompt_callback_parallel_begin) {
1155 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1156 &(parent_task_info->task_data), &(parent_task_info->frame),
1157 &ompt_parallel_data, team_size,
1158 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1163 if (this_thr->th.th_team != serial_team) {
1165 int level = this_thr->th.th_team->t.t_level;
1167 if (serial_team->t.t_serialized) {
1170 kmp_team_t *new_team;
1172 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1175 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1179 proc_bind, &this_thr->th.th_current_task->td_icvs,
1180 0 USE_NESTED_HOT_ARG(NULL));
1181 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1182 KMP_ASSERT(new_team);
1185 new_team->t.t_threads[0] = this_thr;
1186 new_team->t.t_parent = this_thr->th.th_team;
1187 serial_team = new_team;
1188 this_thr->th.th_serial_team = serial_team;
1192 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1193 global_tid, serial_team));
1201 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1202 global_tid, serial_team));
1206 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1207 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1208 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1209 serial_team->t.t_ident = loc;
1210 serial_team->t.t_serialized = 1;
1211 serial_team->t.t_nproc = 1;
1212 serial_team->t.t_parent = this_thr->th.th_team;
1213 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1214 this_thr->th.th_team = serial_team;
1215 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1217 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1218 this_thr->th.th_current_task));
1219 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1220 this_thr->th.th_current_task->td_flags.executing = 0;
1222 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1227 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1228 &this_thr->th.th_current_task->td_parent->td_icvs);
1232 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1233 this_thr->th.th_current_task->td_icvs.nproc =
1234 __kmp_nested_nth.nth[level + 1];
1237 if (__kmp_nested_proc_bind.used &&
1238 (level + 1 < __kmp_nested_proc_bind.used)) {
1239 this_thr->th.th_current_task->td_icvs.proc_bind =
1240 __kmp_nested_proc_bind.bind_types[level + 1];
1244 serial_team->t.t_pkfn = (microtask_t)(~0);
1246 this_thr->th.th_info.ds.ds_tid = 0;
1249 this_thr->th.th_team_nproc = 1;
1250 this_thr->th.th_team_master = this_thr;
1251 this_thr->th.th_team_serialized = 1;
1253 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1254 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1255 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1257 propagateFPControl(serial_team);
1260 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1261 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1262 serial_team->t.t_dispatch->th_disp_buffer =
1263 (dispatch_private_info_t *)__kmp_allocate(
1264 sizeof(dispatch_private_info_t));
1266 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1273 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1274 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1275 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1276 ++serial_team->t.t_serialized;
1277 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1280 int level = this_thr->th.th_team->t.t_level;
1283 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1284 this_thr->th.th_current_task->td_icvs.nproc =
1285 __kmp_nested_nth.nth[level + 1];
1287 serial_team->t.t_level++;
1288 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1289 "of serial team %p to %d\n",
1290 global_tid, serial_team, serial_team->t.t_level));
1293 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1295 dispatch_private_info_t *disp_buffer =
1296 (dispatch_private_info_t *)__kmp_allocate(
1297 sizeof(dispatch_private_info_t));
1298 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1299 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1301 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1305 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1309 if (__kmp_display_affinity) {
1310 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1311 this_thr->th.th_prev_num_threads != 1) {
1313 __kmp_aux_display_affinity(global_tid, NULL);
1314 this_thr->th.th_prev_level = serial_team->t.t_level;
1315 this_thr->th.th_prev_num_threads = 1;
1319 if (__kmp_env_consistency_check)
1320 __kmp_push_parallel(global_tid, NULL);
1322 serial_team->t.ompt_team_info.master_return_address = codeptr;
1323 if (ompt_enabled.enabled &&
1324 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1325 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1326 OMPT_GET_FRAME_ADDRESS(0);
1328 ompt_lw_taskteam_t lw_taskteam;
1329 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1330 &ompt_parallel_data, codeptr);
1332 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1336 if (ompt_enabled.ompt_callback_implicit_task) {
1337 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1338 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1339 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1340 ompt_task_implicit);
1341 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1342 __kmp_tid_from_gtid(global_tid);
1346 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1347 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1348 OMPT_GET_FRAME_ADDRESS(0);
1355int __kmp_fork_call(
ident_t *loc,
int gtid,
1356 enum fork_context_e call_context,
1357 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1362 int master_this_cons;
1364 kmp_team_t *parent_team;
1365 kmp_info_t *master_th;
1369 int master_set_numthreads;
1373#if KMP_NESTED_HOT_TEAMS
1374 kmp_hot_team_ptr_t **p_hot_teams;
1377 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1380 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1381 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1384 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1386 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1387 __kmp_stkpadding += (short)((kmp_int64)dummy);
1393 if (!TCR_4(__kmp_init_parallel))
1394 __kmp_parallel_initialize();
1395 __kmp_resume_if_soft_paused();
1398 master_th = __kmp_threads[gtid];
1400 parent_team = master_th->th.th_team;
1401 master_tid = master_th->th.th_info.ds.ds_tid;
1402 master_this_cons = master_th->th.th_local.this_construct;
1403 root = master_th->th.th_root;
1404 master_active = root->r.r_active;
1405 master_set_numthreads = master_th->th.th_set_nproc;
1408 ompt_data_t ompt_parallel_data = ompt_data_none;
1409 ompt_data_t *parent_task_data;
1410 ompt_frame_t *ompt_frame;
1411 ompt_data_t *implicit_task_data;
1412 void *return_address = NULL;
1414 if (ompt_enabled.enabled) {
1415 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1417 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1422 __kmp_assign_root_init_mask();
1425 level = parent_team->t.t_level;
1427 active_level = parent_team->t.t_active_level;
1429 teams_level = master_th->th.th_teams_level;
1430#if KMP_NESTED_HOT_TEAMS
1431 p_hot_teams = &master_th->th.th_hot_teams;
1432 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1433 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1434 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1435 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1437 (*p_hot_teams)[0].hot_team_nth = 1;
1442 if (ompt_enabled.enabled) {
1443 if (ompt_enabled.ompt_callback_parallel_begin) {
1444 int team_size = master_set_numthreads
1445 ? master_set_numthreads
1446 : get__nproc_2(parent_team, master_tid);
1447 int flags = OMPT_INVOKER(call_context) |
1448 ((microtask == (microtask_t)__kmp_teams_master)
1449 ? ompt_parallel_league
1450 : ompt_parallel_team);
1451 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1452 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1455 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1459 master_th->th.th_ident = loc;
1461 if (master_th->th.th_teams_microtask && ap &&
1462 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1466 parent_team->t.t_ident = loc;
1467 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1468 parent_team->t.t_argc = argc;
1469 argv = (
void **)parent_team->t.t_argv;
1470 for (i = argc - 1; i >= 0; --i)
1471 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1473 if (parent_team == master_th->th.th_serial_team) {
1476 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1478 if (call_context == fork_context_gnu) {
1481 parent_team->t.t_serialized--;
1486 parent_team->t.t_pkfn = microtask;
1491 void **exit_frame_p;
1493 ompt_lw_taskteam_t lw_taskteam;
1495 if (ompt_enabled.enabled) {
1496 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1497 &ompt_parallel_data, return_address);
1498 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1500 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1504 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1505 if (ompt_enabled.ompt_callback_implicit_task) {
1506 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1507 __kmp_tid_from_gtid(gtid);
1508 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1509 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1510 implicit_task_data, 1,
1511 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1515 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1517 exit_frame_p = &dummy;
1522 parent_team->t.t_serialized--;
1525 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1526 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1527 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1536 if (ompt_enabled.enabled) {
1537 *exit_frame_p = NULL;
1538 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1539 if (ompt_enabled.ompt_callback_implicit_task) {
1540 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1541 ompt_scope_end, NULL, implicit_task_data, 1,
1542 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1544 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1545 __ompt_lw_taskteam_unlink(master_th);
1546 if (ompt_enabled.ompt_callback_parallel_end) {
1547 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1548 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1549 OMPT_INVOKER(call_context) | ompt_parallel_team,
1552 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1558 parent_team->t.t_pkfn = microtask;
1559 parent_team->t.t_invoke = invoker;
1560 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1561 parent_team->t.t_active_level++;
1562 parent_team->t.t_level++;
1563 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1566 if (ompt_enabled.enabled) {
1567 ompt_lw_taskteam_t lw_taskteam;
1568 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1569 &ompt_parallel_data, return_address);
1570 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1575 if (master_set_numthreads) {
1576 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1578 kmp_info_t **other_threads = parent_team->t.t_threads;
1581 int old_proc = master_th->th.th_teams_size.nth;
1582 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
1584 __kmp_resize_dist_barrier(parent_team, old_proc,
1585 master_set_numthreads);
1586 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1588 parent_team->t.t_nproc = master_set_numthreads;
1589 for (i = 0; i < master_set_numthreads; ++i) {
1590 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1594 master_th->th.th_set_nproc = 0;
1598 if (__kmp_debugging) {
1599 int nth = __kmp_omp_num_threads(loc);
1601 master_set_numthreads = nth;
1607 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1609 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1610 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1611 proc_bind = proc_bind_false;
1614 if (proc_bind == proc_bind_default) {
1615 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1622 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1623 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1624 master_th->th.th_current_task->td_icvs.proc_bind)) {
1625 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1628 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1630 if (proc_bind_icv != proc_bind_default &&
1631 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1632 kmp_info_t **other_threads = parent_team->t.t_threads;
1633 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1634 other_threads[i]->th.th_current_task->td_icvs.proc_bind =
1639 master_th->th.th_set_proc_bind = proc_bind_default;
1641#if USE_ITT_BUILD && USE_ITT_NOTIFY
1642 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1644 __kmp_forkjoin_frames_mode == 3 &&
1645 parent_team->t.t_active_level == 1
1646 && master_th->th.th_teams_size.nteams == 1) {
1647 kmp_uint64 tmp_time = __itt_get_timestamp();
1648 master_th->th.th_frame_time = tmp_time;
1649 parent_team->t.t_region_time = tmp_time;
1651 if (__itt_stack_caller_create_ptr) {
1652 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1654 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1657#if KMP_AFFINITY_SUPPORTED
1658 __kmp_partition_places(parent_team);
1661 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, "
1662 "master_th=%p, gtid=%d\n",
1663 root, parent_team, master_th, gtid));
1664 __kmp_internal_fork(loc, gtid, parent_team);
1665 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, "
1666 "master_th=%p, gtid=%d\n",
1667 root, parent_team, master_th, gtid));
1669 if (call_context == fork_context_gnu)
1673 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1674 parent_team->t.t_id, parent_team->t.t_pkfn));
1676 if (!parent_team->t.t_invoke(gtid)) {
1677 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1679 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1680 parent_team->t.t_id, parent_team->t.t_pkfn));
1683 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1689 if (__kmp_tasking_mode != tskm_immediate_exec) {
1690 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1691 parent_team->t.t_task_team[master_th->th.th_task_state]);
1698 int enter_teams = 0;
1699 if (parent_team->t.t_active_level >=
1700 master_th->th.th_current_task->td_icvs.max_active_levels) {
1703 enter_teams = ((ap == NULL && active_level == 0) ||
1704 (ap && teams_level > 0 && teams_level == level));
1705 nthreads = master_set_numthreads
1706 ? master_set_numthreads
1708 : get__nproc_2(parent_team, master_tid);
1713 if ((get__max_active_levels(master_th) == 1 &&
1714 (root->r.r_in_parallel && !enter_teams)) ||
1715 (__kmp_library == library_serial)) {
1716 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d"
1724 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1729 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1730 nthreads, enter_teams);
1731 if (nthreads == 1) {
1735 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1739 KMP_DEBUG_ASSERT(nthreads > 0);
1742 master_th->th.th_set_nproc = 0;
1745 if (nthreads == 1) {
1747#if KMP_OS_LINUX && \
1748 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1751 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1756 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1761 master_th->th.th_serial_team->t.t_pkfn = microtask;
1764 if (call_context == fork_context_intel) {
1766 master_th->th.th_serial_team->t.t_ident = loc;
1769 master_th->th.th_serial_team->t.t_level--;
1774 void **exit_frame_p;
1775 ompt_task_info_t *task_info;
1777 ompt_lw_taskteam_t lw_taskteam;
1779 if (ompt_enabled.enabled) {
1780 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1781 &ompt_parallel_data, return_address);
1783 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1786 task_info = OMPT_CUR_TASK_INFO(master_th);
1787 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1788 if (ompt_enabled.ompt_callback_implicit_task) {
1789 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1790 __kmp_tid_from_gtid(gtid);
1791 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1792 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1793 &(task_info->task_data), 1,
1794 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1795 ompt_task_implicit);
1799 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1801 exit_frame_p = &dummy;
1806 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1807 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1808 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1809 parent_team->t.t_argv
1818 if (ompt_enabled.enabled) {
1819 *exit_frame_p = NULL;
1820 if (ompt_enabled.ompt_callback_implicit_task) {
1821 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1822 ompt_scope_end, NULL, &(task_info->task_data), 1,
1823 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1824 ompt_task_implicit);
1826 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1827 __ompt_lw_taskteam_unlink(master_th);
1828 if (ompt_enabled.ompt_callback_parallel_end) {
1829 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1830 &ompt_parallel_data, parent_task_data,
1831 OMPT_INVOKER(call_context) | ompt_parallel_team,
1834 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1837 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1838 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1839 master_th->th.th_serial_team);
1840 team = master_th->th.th_team;
1842 team->t.t_invoke = invoker;
1843 __kmp_alloc_argv_entries(argc, team, TRUE);
1844 team->t.t_argc = argc;
1845 argv = (
void **)team->t.t_argv;
1847 for (i = argc - 1; i >= 0; --i)
1848 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1850 for (i = 0; i < argc; ++i)
1852 argv[i] = parent_team->t.t_argv[i];
1860 if (ompt_enabled.enabled) {
1861 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1862 if (ompt_enabled.ompt_callback_implicit_task) {
1863 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1864 ompt_scope_end, NULL, &(task_info->task_data), 0,
1865 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1867 if (ompt_enabled.ompt_callback_parallel_end) {
1868 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1869 &ompt_parallel_data, parent_task_data,
1870 OMPT_INVOKER(call_context) | ompt_parallel_league,
1873 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1878 for (i = argc - 1; i >= 0; --i)
1879 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1884 void **exit_frame_p;
1885 ompt_task_info_t *task_info;
1887 ompt_lw_taskteam_t lw_taskteam;
1889 if (ompt_enabled.enabled) {
1890 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1891 &ompt_parallel_data, return_address);
1892 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1894 task_info = OMPT_CUR_TASK_INFO(master_th);
1895 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1898 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1899 if (ompt_enabled.ompt_callback_implicit_task) {
1900 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1901 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1902 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1903 ompt_task_implicit);
1904 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1905 __kmp_tid_from_gtid(gtid);
1909 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1911 exit_frame_p = &dummy;
1916 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1917 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1918 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1927 if (ompt_enabled.enabled) {
1928 *exit_frame_p = NULL;
1929 if (ompt_enabled.ompt_callback_implicit_task) {
1930 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1931 ompt_scope_end, NULL, &(task_info->task_data), 1,
1932 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1933 ompt_task_implicit);
1936 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1937 __ompt_lw_taskteam_unlink(master_th);
1938 if (ompt_enabled.ompt_callback_parallel_end) {
1939 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1940 &ompt_parallel_data, parent_task_data,
1941 OMPT_INVOKER(call_context) | ompt_parallel_team,
1944 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1948 }
else if (call_context == fork_context_gnu) {
1950 if (ompt_enabled.enabled) {
1951 ompt_lw_taskteam_t lwt;
1952 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1955 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1956 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1962 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1965 KMP_ASSERT2(call_context < fork_context_last,
1966 "__kmp_fork_call: unknown fork_context parameter");
1969 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1976 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1977 "curtask=%p, curtask_max_aclevel=%d\n",
1978 parent_team->t.t_active_level, master_th,
1979 master_th->th.th_current_task,
1980 master_th->th.th_current_task->td_icvs.max_active_levels));
1984 master_th->th.th_current_task->td_flags.executing = 0;
1986 if (!master_th->th.th_teams_microtask || level > teams_level) {
1988 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1992 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1993 if ((level + 1 < __kmp_nested_nth.used) &&
1994 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1995 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2001 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2003 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2004 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2005 proc_bind = proc_bind_false;
2009 if (proc_bind == proc_bind_default) {
2010 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2013 if (master_th->th.th_teams_microtask &&
2014 microtask == (microtask_t)__kmp_teams_master) {
2015 proc_bind = __kmp_teams_proc_bind;
2021 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2022 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2023 master_th->th.th_current_task->td_icvs.proc_bind)) {
2026 if (!master_th->th.th_teams_microtask ||
2027 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2028 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2033 master_th->th.th_set_proc_bind = proc_bind_default;
2035 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2036 kmp_internal_control_t new_icvs;
2037 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2038 new_icvs.next = NULL;
2039 if (nthreads_icv > 0) {
2040 new_icvs.nproc = nthreads_icv;
2042 if (proc_bind_icv != proc_bind_default) {
2043 new_icvs.proc_bind = proc_bind_icv;
2047 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2048 team = __kmp_allocate_team(root, nthreads, nthreads,
2052 proc_bind, &new_icvs,
2053 argc USE_NESTED_HOT_ARG(master_th));
2054 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2055 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2058 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2059 team = __kmp_allocate_team(root, nthreads, nthreads,
2064 &master_th->th.th_current_task->td_icvs,
2065 argc USE_NESTED_HOT_ARG(master_th));
2066 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2067 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2068 &master_th->th.th_current_task->td_icvs);
2071 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2074 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2075 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2076 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2077 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2078 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2080 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2083 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2085 if (!master_th->th.th_teams_microtask || level > teams_level) {
2086 int new_level = parent_team->t.t_level + 1;
2087 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2088 new_level = parent_team->t.t_active_level + 1;
2089 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2092 int new_level = parent_team->t.t_level;
2093 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2094 new_level = parent_team->t.t_active_level;
2095 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2097 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2099 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2101 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2102 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2105 propagateFPControl(team);
2107 if (ompd_state & OMPD_ENABLE_BP)
2108 ompd_bp_parallel_begin();
2111 if (__kmp_tasking_mode != tskm_immediate_exec) {
2114 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2115 parent_team->t.t_task_team[master_th->th.th_task_state]);
2116 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2117 "%p, new task_team %p / team %p\n",
2118 __kmp_gtid_from_thread(master_th),
2119 master_th->th.th_task_team, parent_team,
2120 team->t.t_task_team[master_th->th.th_task_state], team));
2122 if (active_level || master_th->th.th_task_team) {
2124 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2125 if (master_th->th.th_task_state_top >=
2126 master_th->th.th_task_state_stack_sz) {
2127 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2128 kmp_uint8 *old_stack, *new_stack;
2130 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2131 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2132 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2134 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2138 old_stack = master_th->th.th_task_state_memo_stack;
2139 master_th->th.th_task_state_memo_stack = new_stack;
2140 master_th->th.th_task_state_stack_sz = new_size;
2141 __kmp_free(old_stack);
2145 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2146 master_th->th.th_task_state;
2147 master_th->th.th_task_state_top++;
2148#if KMP_NESTED_HOT_TEAMS
2149 if (master_th->th.th_hot_teams &&
2150 active_level < __kmp_hot_teams_max_level &&
2151 team == master_th->th.th_hot_teams[active_level].hot_team) {
2153 master_th->th.th_task_state =
2155 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2158 master_th->th.th_task_state = 0;
2159#if KMP_NESTED_HOT_TEAMS
2163#if !KMP_NESTED_HOT_TEAMS
2164 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2165 (team == root->r.r_hot_team));
2171 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2172 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2174 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2175 (team->t.t_master_tid == 0 &&
2176 (team->t.t_parent == root->r.r_root_team ||
2177 team->t.t_parent->t.t_serialized)));
2181 argv = (
void **)team->t.t_argv;
2183 for (i = argc - 1; i >= 0; --i) {
2184 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2185 KMP_CHECK_UPDATE(*argv, new_argv);
2189 for (i = 0; i < argc; ++i) {
2191 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2196 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2197 if (!root->r.r_active)
2198 root->r.r_active = TRUE;
2200 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2201 __kmp_setup_icv_copy(team, nthreads,
2202 &master_th->th.th_current_task->td_icvs, loc);
2205 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2208 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2211 if (team->t.t_active_level == 1
2212 && !master_th->th.th_teams_microtask) {
2214 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2215 (__kmp_forkjoin_frames_mode == 3 ||
2216 __kmp_forkjoin_frames_mode == 1)) {
2217 kmp_uint64 tmp_time = 0;
2218 if (__itt_get_timestamp_ptr)
2219 tmp_time = __itt_get_timestamp();
2221 master_th->th.th_frame_time = tmp_time;
2222 if (__kmp_forkjoin_frames_mode == 3)
2223 team->t.t_region_time = tmp_time;
2227 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2228 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2230 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2236 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2239 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2240 root, team, master_th, gtid));
2243 if (__itt_stack_caller_create_ptr) {
2246 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2247 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2248 }
else if (parent_team->t.t_serialized) {
2253 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2254 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2262 __kmp_internal_fork(loc, gtid, team);
2263 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2264 "master_th=%p, gtid=%d\n",
2265 root, team, master_th, gtid));
2268 if (call_context == fork_context_gnu) {
2269 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2274 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2275 team->t.t_id, team->t.t_pkfn));
2278#if KMP_STATS_ENABLED
2282 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2286 if (!team->t.t_invoke(gtid)) {
2287 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2290#if KMP_STATS_ENABLED
2293 KMP_SET_THREAD_STATE(previous_state);
2297 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2298 team->t.t_id, team->t.t_pkfn));
2301 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2303 if (ompt_enabled.enabled) {
2304 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2312static inline void __kmp_join_restore_state(kmp_info_t *thread,
2315 thread->th.ompt_thread_info.state =
2316 ((team->t.t_serialized) ? ompt_state_work_serial
2317 : ompt_state_work_parallel);
2320static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2321 kmp_team_t *team, ompt_data_t *parallel_data,
2322 int flags,
void *codeptr) {
2323 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2324 if (ompt_enabled.ompt_callback_parallel_end) {
2325 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2326 parallel_data, &(task_info->task_data), flags, codeptr);
2329 task_info->frame.enter_frame = ompt_data_none;
2330 __kmp_join_restore_state(thread, team);
2334void __kmp_join_call(
ident_t *loc,
int gtid
2337 enum fork_context_e fork_context
2341 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2343 kmp_team_t *parent_team;
2344 kmp_info_t *master_th;
2348 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2351 master_th = __kmp_threads[gtid];
2352 root = master_th->th.th_root;
2353 team = master_th->th.th_team;
2354 parent_team = team->t.t_parent;
2356 master_th->th.th_ident = loc;
2359 void *team_microtask = (
void *)team->t.t_pkfn;
2363 if (ompt_enabled.enabled &&
2364 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2365 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2370 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2371 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2372 "th_task_team = %p\n",
2373 __kmp_gtid_from_thread(master_th), team,
2374 team->t.t_task_team[master_th->th.th_task_state],
2375 master_th->th.th_task_team));
2376 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2377 team->t.t_task_team[master_th->th.th_task_state]);
2381 if (team->t.t_serialized) {
2382 if (master_th->th.th_teams_microtask) {
2384 int level = team->t.t_level;
2385 int tlevel = master_th->th.th_teams_level;
2386 if (level == tlevel) {
2390 }
else if (level == tlevel + 1) {
2394 team->t.t_serialized++;
2400 if (ompt_enabled.enabled) {
2401 if (fork_context == fork_context_gnu) {
2402 __ompt_lw_taskteam_unlink(master_th);
2404 __kmp_join_restore_state(master_th, parent_team);
2411 master_active = team->t.t_master_active;
2416 __kmp_internal_join(loc, gtid, team);
2418 if (__itt_stack_caller_create_ptr) {
2419 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2421 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2422 team->t.t_stack_id = NULL;
2426 master_th->th.th_task_state =
2429 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2430 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2434 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2435 parent_team->t.t_stack_id = NULL;
2439 if (team->t.t_nproc > 1 &&
2440 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2441 team->t.b->update_num_threads(team->t.t_nproc);
2442 __kmp_add_threads_to_team(team, team->t.t_nproc);
2449 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2450 void *codeptr = team->t.ompt_team_info.master_return_address;
2455 if (team->t.t_active_level == 1 &&
2456 (!master_th->th.th_teams_microtask ||
2457 master_th->th.th_teams_size.nteams == 1)) {
2458 master_th->th.th_ident = loc;
2461 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2462 __kmp_forkjoin_frames_mode == 3)
2463 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2464 master_th->th.th_frame_time, 0, loc,
2465 master_th->th.th_team_nproc, 1);
2466 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2467 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2468 __kmp_itt_region_joined(gtid);
2472#if KMP_AFFINITY_SUPPORTED
2475 master_th->th.th_first_place = team->t.t_first_place;
2476 master_th->th.th_last_place = team->t.t_last_place;
2480 if (master_th->th.th_teams_microtask && !exit_teams &&
2481 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2482 team->t.t_level == master_th->th.th_teams_level + 1) {
2487 ompt_data_t ompt_parallel_data = ompt_data_none;
2488 if (ompt_enabled.enabled) {
2489 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2490 if (ompt_enabled.ompt_callback_implicit_task) {
2491 int ompt_team_size = team->t.t_nproc;
2492 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2493 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2494 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2496 task_info->frame.exit_frame = ompt_data_none;
2497 task_info->task_data = ompt_data_none;
2498 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2499 __ompt_lw_taskteam_unlink(master_th);
2504 team->t.t_active_level--;
2505 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2511 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2512 int old_num = master_th->th.th_team_nproc;
2513 int new_num = master_th->th.th_teams_size.nth;
2514 kmp_info_t **other_threads = team->t.t_threads;
2515 team->t.t_nproc = new_num;
2516 for (
int i = 0; i < old_num; ++i) {
2517 other_threads[i]->th.th_team_nproc = new_num;
2520 for (
int i = old_num; i < new_num; ++i) {
2522 KMP_DEBUG_ASSERT(other_threads[i]);
2523 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2524 for (
int b = 0; b < bs_last_barrier; ++b) {
2525 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2526 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2528 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2531 if (__kmp_tasking_mode != tskm_immediate_exec) {
2533 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2539 if (ompt_enabled.enabled) {
2540 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2541 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2549 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2550 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2552 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2557 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2559 if (!master_th->th.th_teams_microtask ||
2560 team->t.t_level > master_th->th.th_teams_level) {
2562 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2564 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2567 if (ompt_enabled.enabled) {
2568 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2569 if (ompt_enabled.ompt_callback_implicit_task) {
2570 int flags = (team_microtask == (
void *)__kmp_teams_master)
2572 : ompt_task_implicit;
2573 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2574 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2575 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2576 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2578 task_info->frame.exit_frame = ompt_data_none;
2579 task_info->task_data = ompt_data_none;
2583 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2585 __kmp_pop_current_task_from_thread(master_th);
2587 master_th->th.th_def_allocator = team->t.t_def_allocator;
2590 if (ompd_state & OMPD_ENABLE_BP)
2591 ompd_bp_parallel_end();
2593 updateHWFPControl(team);
2595 if (root->r.r_active != master_active)
2596 root->r.r_active = master_active;
2598 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2606 master_th->th.th_team = parent_team;
2607 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2608 master_th->th.th_team_master = parent_team->t.t_threads[0];
2609 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2612 if (parent_team->t.t_serialized &&
2613 parent_team != master_th->th.th_serial_team &&
2614 parent_team != root->r.r_root_team) {
2615 __kmp_free_team(root,
2616 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2617 master_th->th.th_serial_team = parent_team;
2620 if (__kmp_tasking_mode != tskm_immediate_exec) {
2621 if (master_th->th.th_task_state_top >
2623 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2625 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2626 master_th->th.th_task_state;
2627 --master_th->th.th_task_state_top;
2629 master_th->th.th_task_state =
2631 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2634 master_th->th.th_task_team =
2635 parent_team->t.t_task_team[master_th->th.th_task_state];
2637 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2638 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2645 master_th->th.th_current_task->td_flags.executing = 1;
2647 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2649#if KMP_AFFINITY_SUPPORTED
2650 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2651 __kmp_reset_root_init_mask(gtid);
2656 OMPT_INVOKER(fork_context) |
2657 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2658 : ompt_parallel_team);
2659 if (ompt_enabled.enabled) {
2660 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2666 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2671void __kmp_save_internal_controls(kmp_info_t *thread) {
2673 if (thread->th.th_team != thread->th.th_serial_team) {
2676 if (thread->th.th_team->t.t_serialized > 1) {
2679 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2682 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2683 thread->th.th_team->t.t_serialized) {
2688 kmp_internal_control_t *control =
2689 (kmp_internal_control_t *)__kmp_allocate(
2690 sizeof(kmp_internal_control_t));
2692 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2694 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2696 control->next = thread->th.th_team->t.t_control_stack_top;
2697 thread->th.th_team->t.t_control_stack_top = control;
2703void __kmp_set_num_threads(
int new_nth,
int gtid) {
2707 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2708 KMP_DEBUG_ASSERT(__kmp_init_serial);
2712 else if (new_nth > __kmp_max_nth)
2713 new_nth = __kmp_max_nth;
2716 thread = __kmp_threads[gtid];
2717 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2720 __kmp_save_internal_controls(thread);
2722 set__nproc(thread, new_nth);
2727 root = thread->th.th_root;
2728 if (__kmp_init_parallel && (!root->r.r_active) &&
2729 (root->r.r_hot_team->t.t_nproc > new_nth)
2730#
if KMP_NESTED_HOT_TEAMS
2731 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2734 kmp_team_t *hot_team = root->r.r_hot_team;
2737 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2739 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2740 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2743 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2744 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2745 if (__kmp_tasking_mode != tskm_immediate_exec) {
2748 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2750 __kmp_free_thread(hot_team->t.t_threads[f]);
2751 hot_team->t.t_threads[f] = NULL;
2753 hot_team->t.t_nproc = new_nth;
2754#if KMP_NESTED_HOT_TEAMS
2755 if (thread->th.th_hot_teams) {
2756 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2757 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2761 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2762 hot_team->t.b->update_num_threads(new_nth);
2763 __kmp_add_threads_to_team(hot_team, new_nth);
2766 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2769 for (f = 0; f < new_nth; f++) {
2770 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2771 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2774 hot_team->t.t_size_changed = -1;
2779void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2782 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2784 gtid, max_active_levels));
2785 KMP_DEBUG_ASSERT(__kmp_init_serial);
2788 if (max_active_levels < 0) {
2789 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2794 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2795 "max_active_levels for thread %d = (%d)\n",
2796 gtid, max_active_levels));
2799 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2804 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2805 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2806 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2812 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2813 "max_active_levels for thread %d = (%d)\n",
2814 gtid, max_active_levels));
2816 thread = __kmp_threads[gtid];
2818 __kmp_save_internal_controls(thread);
2820 set__max_active_levels(thread, max_active_levels);
2824int __kmp_get_max_active_levels(
int gtid) {
2827 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2828 KMP_DEBUG_ASSERT(__kmp_init_serial);
2830 thread = __kmp_threads[gtid];
2831 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2832 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2833 "curtask_maxaclevel=%d\n",
2834 gtid, thread->th.th_current_task,
2835 thread->th.th_current_task->td_icvs.max_active_levels));
2836 return thread->th.th_current_task->td_icvs.max_active_levels;
2840void __kmp_set_num_teams(
int num_teams) {
2842 __kmp_nteams = num_teams;
2844int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2846void __kmp_set_teams_thread_limit(
int limit) {
2848 __kmp_teams_thread_limit = limit;
2850int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2852KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2853KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2856void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2858 kmp_sched_t orig_kind;
2861 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2862 gtid, (
int)kind, chunk));
2863 KMP_DEBUG_ASSERT(__kmp_init_serial);
2870 kind = __kmp_sched_without_mods(kind);
2872 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2873 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2875 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2876 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2878 kind = kmp_sched_default;
2882 thread = __kmp_threads[gtid];
2884 __kmp_save_internal_controls(thread);
2886 if (kind < kmp_sched_upper_std) {
2887 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2890 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2892 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2893 __kmp_sch_map[kind - kmp_sched_lower - 1];
2898 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2899 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2900 kmp_sched_lower - 2];
2902 __kmp_sched_apply_mods_intkind(
2903 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2904 if (kind == kmp_sched_auto || chunk < 1) {
2906 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2908 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2913void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2917 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2918 KMP_DEBUG_ASSERT(__kmp_init_serial);
2920 thread = __kmp_threads[gtid];
2922 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2923 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2925 case kmp_sch_static_greedy:
2926 case kmp_sch_static_balanced:
2927 *kind = kmp_sched_static;
2928 __kmp_sched_apply_mods_stdkind(kind, th_type);
2931 case kmp_sch_static_chunked:
2932 *kind = kmp_sched_static;
2934 case kmp_sch_dynamic_chunked:
2935 *kind = kmp_sched_dynamic;
2938 case kmp_sch_guided_iterative_chunked:
2939 case kmp_sch_guided_analytical_chunked:
2940 *kind = kmp_sched_guided;
2943 *kind = kmp_sched_auto;
2945 case kmp_sch_trapezoidal:
2946 *kind = kmp_sched_trapezoidal;
2948#if KMP_STATIC_STEAL_ENABLED
2949 case kmp_sch_static_steal:
2950 *kind = kmp_sched_static_steal;
2954 KMP_FATAL(UnknownSchedulingType, th_type);
2957 __kmp_sched_apply_mods_stdkind(kind, th_type);
2958 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2961int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2967 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2968 KMP_DEBUG_ASSERT(__kmp_init_serial);
2975 thr = __kmp_threads[gtid];
2976 team = thr->th.th_team;
2977 ii = team->t.t_level;
2981 if (thr->th.th_teams_microtask) {
2983 int tlevel = thr->th.th_teams_level;
2986 KMP_DEBUG_ASSERT(ii >= tlevel);
2998 return __kmp_tid_from_gtid(gtid);
3000 dd = team->t.t_serialized;
3002 while (ii > level) {
3003 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3005 if ((team->t.t_serialized) && (!dd)) {
3006 team = team->t.t_parent;
3010 team = team->t.t_parent;
3011 dd = team->t.t_serialized;
3016 return (dd > 1) ? (0) : (team->t.t_master_tid);
3019int __kmp_get_team_size(
int gtid,
int level) {
3025 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3026 KMP_DEBUG_ASSERT(__kmp_init_serial);
3033 thr = __kmp_threads[gtid];
3034 team = thr->th.th_team;
3035 ii = team->t.t_level;
3039 if (thr->th.th_teams_microtask) {
3041 int tlevel = thr->th.th_teams_level;
3044 KMP_DEBUG_ASSERT(ii >= tlevel);
3055 while (ii > level) {
3056 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3058 if (team->t.t_serialized && (!dd)) {
3059 team = team->t.t_parent;
3063 team = team->t.t_parent;
3068 return team->t.t_nproc;
3071kmp_r_sched_t __kmp_get_schedule_global() {
3076 kmp_r_sched_t r_sched;
3082 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3083 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3086 r_sched.r_sched_type = __kmp_static;
3089 r_sched.r_sched_type = __kmp_guided;
3091 r_sched.r_sched_type = __kmp_sched;
3093 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3095 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3097 r_sched.chunk = KMP_DEFAULT_CHUNK;
3099 r_sched.chunk = __kmp_chunk;
3107static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3109 KMP_DEBUG_ASSERT(team);
3110 if (!realloc || argc > team->t.t_max_argc) {
3112 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3113 "current entries=%d\n",
3114 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3116 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3117 __kmp_free((
void *)team->t.t_argv);
3119 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3121 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3122 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3124 team->t.t_id, team->t.t_max_argc));
3125 team->t.t_argv = &team->t.t_inline_argv[0];
3126 if (__kmp_storage_map) {
3127 __kmp_print_storage_map_gtid(
3128 -1, &team->t.t_inline_argv[0],
3129 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3130 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3135 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3136 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3138 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3140 team->t.t_id, team->t.t_max_argc));
3142 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3143 if (__kmp_storage_map) {
3144 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3145 &team->t.t_argv[team->t.t_max_argc],
3146 sizeof(
void *) * team->t.t_max_argc,
3147 "team_%d.t_argv", team->t.t_id);
3153static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3155 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3157 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3158 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3159 sizeof(dispatch_shared_info_t) * num_disp_buff);
3160 team->t.t_dispatch =
3161 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3162 team->t.t_implicit_task_taskdata =
3163 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3164 team->t.t_max_nproc = max_nth;
3167 for (i = 0; i < num_disp_buff; ++i) {
3168 team->t.t_disp_buffer[i].buffer_index = i;
3169 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3173static void __kmp_free_team_arrays(kmp_team_t *team) {
3176 for (i = 0; i < team->t.t_max_nproc; ++i) {
3177 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3178 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3179 team->t.t_dispatch[i].th_disp_buffer = NULL;
3182#if KMP_USE_HIER_SCHED
3183 __kmp_dispatch_free_hierarchies(team);
3185 __kmp_free(team->t.t_threads);
3186 __kmp_free(team->t.t_disp_buffer);
3187 __kmp_free(team->t.t_dispatch);
3188 __kmp_free(team->t.t_implicit_task_taskdata);
3189 team->t.t_threads = NULL;
3190 team->t.t_disp_buffer = NULL;
3191 team->t.t_dispatch = NULL;
3192 team->t.t_implicit_task_taskdata = 0;
3195static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3196 kmp_info_t **oldThreads = team->t.t_threads;
3198 __kmp_free(team->t.t_disp_buffer);
3199 __kmp_free(team->t.t_dispatch);
3200 __kmp_free(team->t.t_implicit_task_taskdata);
3201 __kmp_allocate_team_arrays(team, max_nth);
3203 KMP_MEMCPY(team->t.t_threads, oldThreads,
3204 team->t.t_nproc *
sizeof(kmp_info_t *));
3206 __kmp_free(oldThreads);
3209static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3211 kmp_r_sched_t r_sched =
3212 __kmp_get_schedule_global();
3214 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3216 kmp_internal_control_t g_icvs = {
3218 (kmp_int8)__kmp_global.g.g_dynamic,
3220 (kmp_int8)__kmp_env_blocktime,
3222 __kmp_dflt_blocktime,
3227 __kmp_dflt_team_nth,
3231 __kmp_dflt_max_active_levels,
3235 __kmp_nested_proc_bind.bind_types[0],
3236 __kmp_default_device,
3243static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3245 kmp_internal_control_t gx_icvs;
3246 gx_icvs.serial_nesting_level =
3248 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3249 gx_icvs.next = NULL;
3254static void __kmp_initialize_root(kmp_root_t *root) {
3256 kmp_team_t *root_team;
3257 kmp_team_t *hot_team;
3258 int hot_team_max_nth;
3259 kmp_r_sched_t r_sched =
3260 __kmp_get_schedule_global();
3261 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3262 KMP_DEBUG_ASSERT(root);
3263 KMP_ASSERT(!root->r.r_begin);
3266 __kmp_init_lock(&root->r.r_begin_lock);
3267 root->r.r_begin = FALSE;
3268 root->r.r_active = FALSE;
3269 root->r.r_in_parallel = 0;
3270 root->r.r_blocktime = __kmp_dflt_blocktime;
3271#if KMP_AFFINITY_SUPPORTED
3272 root->r.r_affinity_assigned = FALSE;
3277 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3280 __kmp_allocate_team(root,
3286 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3288 USE_NESTED_HOT_ARG(NULL)
3293 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3296 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3298 root->r.r_root_team = root_team;
3299 root_team->t.t_control_stack_top = NULL;
3302 root_team->t.t_threads[0] = NULL;
3303 root_team->t.t_nproc = 1;
3304 root_team->t.t_serialized = 1;
3306 root_team->t.t_sched.sched = r_sched.sched;
3309 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3310 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3314 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3317 __kmp_allocate_team(root,
3319 __kmp_dflt_team_nth_ub * 2,
3323 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3325 USE_NESTED_HOT_ARG(NULL)
3327 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3329 root->r.r_hot_team = hot_team;
3330 root_team->t.t_control_stack_top = NULL;
3333 hot_team->t.t_parent = root_team;
3336 hot_team_max_nth = hot_team->t.t_max_nproc;
3337 for (f = 0; f < hot_team_max_nth; ++f) {
3338 hot_team->t.t_threads[f] = NULL;
3340 hot_team->t.t_nproc = 1;
3342 hot_team->t.t_sched.sched = r_sched.sched;
3343 hot_team->t.t_size_changed = 0;
3348typedef struct kmp_team_list_item {
3349 kmp_team_p
const *entry;
3350 struct kmp_team_list_item *next;
3351} kmp_team_list_item_t;
3352typedef kmp_team_list_item_t *kmp_team_list_t;
3354static void __kmp_print_structure_team_accum(
3355 kmp_team_list_t list,
3356 kmp_team_p
const *team
3366 KMP_DEBUG_ASSERT(list != NULL);
3371 __kmp_print_structure_team_accum(list, team->t.t_parent);
3372 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3376 while (l->next != NULL && l->entry != team) {
3379 if (l->next != NULL) {
3385 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3391 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3392 sizeof(kmp_team_list_item_t));
3399static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3402 __kmp_printf(
"%s", title);
3404 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3406 __kmp_printf(
" - (nil)\n");
3410static void __kmp_print_structure_thread(
char const *title,
3411 kmp_info_p
const *thread) {
3412 __kmp_printf(
"%s", title);
3413 if (thread != NULL) {
3414 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3416 __kmp_printf(
" - (nil)\n");
3420void __kmp_print_structure(
void) {
3422 kmp_team_list_t list;
3426 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3430 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3431 "Table\n------------------------------\n");
3434 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3435 __kmp_printf(
"%2d", gtid);
3436 if (__kmp_threads != NULL) {
3437 __kmp_printf(
" %p", __kmp_threads[gtid]);
3439 if (__kmp_root != NULL) {
3440 __kmp_printf(
" %p", __kmp_root[gtid]);
3447 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3449 if (__kmp_threads != NULL) {
3451 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3452 kmp_info_t
const *thread = __kmp_threads[gtid];
3453 if (thread != NULL) {
3454 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3455 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3456 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3457 __kmp_print_structure_team(
" Serial Team: ",
3458 thread->th.th_serial_team);
3459 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3460 __kmp_print_structure_thread(
" Primary: ",
3461 thread->th.th_team_master);
3462 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3463 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3464 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3465 __kmp_print_structure_thread(
" Next in pool: ",
3466 thread->th.th_next_pool);
3468 __kmp_print_structure_team_accum(list, thread->th.th_team);
3469 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3473 __kmp_printf(
"Threads array is not allocated.\n");
3477 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3479 if (__kmp_root != NULL) {
3481 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3482 kmp_root_t
const *root = __kmp_root[gtid];
3484 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3485 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3486 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3487 __kmp_print_structure_thread(
" Uber Thread: ",
3488 root->r.r_uber_thread);
3489 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3490 __kmp_printf(
" In Parallel: %2d\n",
3491 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3493 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3494 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3498 __kmp_printf(
"Ubers array is not allocated.\n");
3501 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3503 while (list->next != NULL) {
3504 kmp_team_p
const *team = list->entry;
3506 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3507 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3508 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3509 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3510 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3511 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3512 for (i = 0; i < team->t.t_nproc; ++i) {
3513 __kmp_printf(
" Thread %2d: ", i);
3514 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3516 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3522 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3524 __kmp_print_structure_thread(
"Thread pool: ",
3525 CCAST(kmp_info_t *, __kmp_thread_pool));
3526 __kmp_print_structure_team(
"Team pool: ",
3527 CCAST(kmp_team_t *, __kmp_team_pool));
3531 while (list != NULL) {
3532 kmp_team_list_item_t *item = list;
3534 KMP_INTERNAL_FREE(item);
3543static const unsigned __kmp_primes[] = {
3544 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3545 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3546 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3547 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3548 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3549 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3550 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3551 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3552 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3553 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3554 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3558unsigned short __kmp_get_random(kmp_info_t *thread) {
3559 unsigned x = thread->th.th_x;
3560 unsigned short r = (
unsigned short)(x >> 16);
3562 thread->th.th_x = x * thread->th.th_a + 1;
3564 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3565 thread->th.th_info.ds.ds_tid, r));
3571void __kmp_init_random(kmp_info_t *thread) {
3572 unsigned seed = thread->th.th_info.ds.ds_tid;
3575 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3576 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3578 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3584static int __kmp_reclaim_dead_roots(
void) {
3587 for (i = 0; i < __kmp_threads_capacity; ++i) {
3588 if (KMP_UBER_GTID(i) &&
3589 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3592 r += __kmp_unregister_root_other_thread(i);
3617static int __kmp_expand_threads(
int nNeed) {
3619 int minimumRequiredCapacity;
3621 kmp_info_t **newThreads;
3622 kmp_root_t **newRoot;
3628#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3631 added = __kmp_reclaim_dead_roots();
3660 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3663 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3667 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3669 newCapacity = __kmp_threads_capacity;
3671 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3672 : __kmp_sys_max_nth;
3673 }
while (newCapacity < minimumRequiredCapacity);
3674 newThreads = (kmp_info_t **)__kmp_allocate(
3675 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3677 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3678 KMP_MEMCPY(newThreads, __kmp_threads,
3679 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3680 KMP_MEMCPY(newRoot, __kmp_root,
3681 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3684 kmp_old_threads_list_t *node =
3685 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3686 node->threads = __kmp_threads;
3687 node->next = __kmp_old_threads_list;
3688 __kmp_old_threads_list = node;
3690 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3691 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3692 added += newCapacity - __kmp_threads_capacity;
3693 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3695 if (newCapacity > __kmp_tp_capacity) {
3696 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3697 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3698 __kmp_threadprivate_resize_cache(newCapacity);
3700 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3702 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3711int __kmp_register_root(
int initial_thread) {
3712 kmp_info_t *root_thread;
3716 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3717 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3734 capacity = __kmp_threads_capacity;
3735 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3742 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3743 capacity -= __kmp_hidden_helper_threads_num;
3747 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3748 if (__kmp_tp_cached) {
3749 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3750 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3751 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3753 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3763 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3766 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3767 gtid <= __kmp_hidden_helper_threads_num;
3770 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3771 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3772 "hidden helper thread: T#%d\n",
3778 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3781 for (gtid = __kmp_hidden_helper_threads_num + 1;
3782 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3786 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3787 KMP_ASSERT(gtid < __kmp_threads_capacity);
3792 TCW_4(__kmp_nth, __kmp_nth + 1);
3796 if (__kmp_adjust_gtid_mode) {
3797 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3798 if (TCR_4(__kmp_gtid_mode) != 2) {
3799 TCW_4(__kmp_gtid_mode, 2);
3802 if (TCR_4(__kmp_gtid_mode) != 1) {
3803 TCW_4(__kmp_gtid_mode, 1);
3808#ifdef KMP_ADJUST_BLOCKTIME
3811 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3812 if (__kmp_nth > __kmp_avail_proc) {
3813 __kmp_zero_bt = TRUE;
3819 if (!(root = __kmp_root[gtid])) {
3820 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3821 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3824#if KMP_STATS_ENABLED
3826 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3827 __kmp_stats_thread_ptr->startLife();
3828 KMP_SET_THREAD_STATE(SERIAL_REGION);
3831 __kmp_initialize_root(root);
3834 if (root->r.r_uber_thread) {
3835 root_thread = root->r.r_uber_thread;
3837 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3838 if (__kmp_storage_map) {
3839 __kmp_print_thread_storage_map(root_thread, gtid);
3841 root_thread->th.th_info.ds.ds_gtid = gtid;
3843 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3845 root_thread->th.th_root = root;
3846 if (__kmp_env_consistency_check) {
3847 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3850 __kmp_initialize_fast_memory(root_thread);
3854 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3855 __kmp_initialize_bget(root_thread);
3857 __kmp_init_random(root_thread);
3861 if (!root_thread->th.th_serial_team) {
3862 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3863 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3864 root_thread->th.th_serial_team = __kmp_allocate_team(
3869 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3871 KMP_ASSERT(root_thread->th.th_serial_team);
3872 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3873 root_thread->th.th_serial_team));
3876 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3878 root->r.r_root_team->t.t_threads[0] = root_thread;
3879 root->r.r_hot_team->t.t_threads[0] = root_thread;
3880 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3882 root_thread->th.th_serial_team->t.t_serialized = 0;
3883 root->r.r_uber_thread = root_thread;
3886 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3887 TCW_4(__kmp_init_gtid, TRUE);
3890 __kmp_gtid_set_specific(gtid);
3893 __kmp_itt_thread_name(gtid);
3896#ifdef KMP_TDATA_GTID
3899 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3900 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3902 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3904 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3905 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3906 KMP_INIT_BARRIER_STATE));
3909 for (b = 0; b < bs_last_barrier; ++b) {
3910 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3912 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3916 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3917 KMP_INIT_BARRIER_STATE);
3919#if KMP_AFFINITY_SUPPORTED
3920 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3921 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3922 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3923 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3925 root_thread->th.th_def_allocator = __kmp_def_allocator;
3926 root_thread->th.th_prev_level = 0;
3927 root_thread->th.th_prev_num_threads = 1;
3929 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3930 tmp->cg_root = root_thread;
3931 tmp->cg_thread_limit = __kmp_cg_max_nth;
3932 tmp->cg_nthreads = 1;
3933 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3934 " cg_nthreads init to 1\n",
3937 root_thread->th.th_cg_roots = tmp;
3939 __kmp_root_counter++;
3942 if (!initial_thread && ompt_enabled.enabled) {
3944 kmp_info_t *root_thread = ompt_get_thread();
3946 ompt_set_thread_state(root_thread, ompt_state_overhead);
3948 if (ompt_enabled.ompt_callback_thread_begin) {
3949 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3950 ompt_thread_initial, __ompt_get_thread_data_internal());
3952 ompt_data_t *task_data;
3953 ompt_data_t *parallel_data;
3954 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
3956 if (ompt_enabled.ompt_callback_implicit_task) {
3957 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3958 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3961 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3965 if (ompd_state & OMPD_ENABLE_BP)
3966 ompd_bp_thread_begin();
3970 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3975#if KMP_NESTED_HOT_TEAMS
3976static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3977 const int max_level) {
3979 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3980 if (!hot_teams || !hot_teams[level].hot_team) {
3983 KMP_DEBUG_ASSERT(level < max_level);
3984 kmp_team_t *team = hot_teams[level].hot_team;
3985 nth = hot_teams[level].hot_team_nth;
3987 if (level < max_level - 1) {
3988 for (i = 0; i < nth; ++i) {
3989 kmp_info_t *th = team->t.t_threads[i];
3990 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3991 if (i > 0 && th->th.th_hot_teams) {
3992 __kmp_free(th->th.th_hot_teams);
3993 th->th.th_hot_teams = NULL;
3997 __kmp_free_team(root, team, NULL);
4004static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4005 kmp_team_t *root_team = root->r.r_root_team;
4006 kmp_team_t *hot_team = root->r.r_hot_team;
4007 int n = hot_team->t.t_nproc;
4010 KMP_DEBUG_ASSERT(!root->r.r_active);
4012 root->r.r_root_team = NULL;
4013 root->r.r_hot_team = NULL;
4016 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4017#if KMP_NESTED_HOT_TEAMS
4018 if (__kmp_hot_teams_max_level >
4020 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4021 kmp_info_t *th = hot_team->t.t_threads[i];
4022 if (__kmp_hot_teams_max_level > 1) {
4023 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4025 if (th->th.th_hot_teams) {
4026 __kmp_free(th->th.th_hot_teams);
4027 th->th.th_hot_teams = NULL;
4032 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4037 if (__kmp_tasking_mode != tskm_immediate_exec) {
4038 __kmp_wait_to_unref_task_teams();
4044 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4046 (LPVOID) & (root->r.r_uber_thread->th),
4047 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4048 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4052 if (ompd_state & OMPD_ENABLE_BP)
4053 ompd_bp_thread_end();
4057 ompt_data_t *task_data;
4058 ompt_data_t *parallel_data;
4059 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4061 if (ompt_enabled.ompt_callback_implicit_task) {
4062 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4063 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4065 if (ompt_enabled.ompt_callback_thread_end) {
4066 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4067 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4073 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4074 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4076 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4077 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4080 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4081 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4082 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4083 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4084 root->r.r_uber_thread->th.th_cg_roots = NULL;
4086 __kmp_reap_thread(root->r.r_uber_thread, 1);
4090 root->r.r_uber_thread = NULL;
4092 root->r.r_begin = FALSE;
4097void __kmp_unregister_root_current_thread(
int gtid) {
4098 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4102 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4103 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4104 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4107 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4110 kmp_root_t *root = __kmp_root[gtid];
4112 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4113 KMP_ASSERT(KMP_UBER_GTID(gtid));
4114 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4115 KMP_ASSERT(root->r.r_active == FALSE);
4119 kmp_info_t *thread = __kmp_threads[gtid];
4120 kmp_team_t *team = thread->th.th_team;
4121 kmp_task_team_t *task_team = thread->th.th_task_team;
4124 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4125 task_team->tt.tt_hidden_helper_task_encountered)) {
4128 thread->th.ompt_thread_info.state = ompt_state_undefined;
4130 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4133 __kmp_reset_root(gtid, root);
4137 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4139 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4146static int __kmp_unregister_root_other_thread(
int gtid) {
4147 kmp_root_t *root = __kmp_root[gtid];
4150 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4151 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4152 KMP_ASSERT(KMP_UBER_GTID(gtid));
4153 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4154 KMP_ASSERT(root->r.r_active == FALSE);
4156 r = __kmp_reset_root(gtid, root);
4158 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4164void __kmp_task_info() {
4166 kmp_int32 gtid = __kmp_entry_gtid();
4167 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4168 kmp_info_t *this_thr = __kmp_threads[gtid];
4169 kmp_team_t *steam = this_thr->th.th_serial_team;
4170 kmp_team_t *team = this_thr->th.th_team;
4173 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4175 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4176 team->t.t_implicit_task_taskdata[tid].td_parent);
4183static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4184 int tid,
int gtid) {
4188 KMP_DEBUG_ASSERT(this_thr != NULL);
4189 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4190 KMP_DEBUG_ASSERT(team);
4191 KMP_DEBUG_ASSERT(team->t.t_threads);
4192 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4193 kmp_info_t *master = team->t.t_threads[0];
4194 KMP_DEBUG_ASSERT(master);
4195 KMP_DEBUG_ASSERT(master->th.th_root);
4199 TCW_SYNC_PTR(this_thr->th.th_team, team);
4201 this_thr->th.th_info.ds.ds_tid = tid;
4202 this_thr->th.th_set_nproc = 0;
4203 if (__kmp_tasking_mode != tskm_immediate_exec)
4206 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4208 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4209 this_thr->th.th_set_proc_bind = proc_bind_default;
4210#if KMP_AFFINITY_SUPPORTED
4211 this_thr->th.th_new_place = this_thr->th.th_current_place;
4213 this_thr->th.th_root = master->th.th_root;
4216 this_thr->th.th_team_nproc = team->t.t_nproc;
4217 this_thr->th.th_team_master = master;
4218 this_thr->th.th_team_serialized = team->t.t_serialized;
4220 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4222 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4223 tid, gtid, this_thr, this_thr->th.th_current_task));
4225 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4228 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4229 tid, gtid, this_thr, this_thr->th.th_current_task));
4234 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4236 this_thr->th.th_local.this_construct = 0;
4238 if (!this_thr->th.th_pri_common) {
4239 this_thr->th.th_pri_common =
4240 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4241 if (__kmp_storage_map) {
4242 __kmp_print_storage_map_gtid(
4243 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4244 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4246 this_thr->th.th_pri_head = NULL;
4249 if (this_thr != master &&
4250 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4252 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4253 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4256 int i = tmp->cg_nthreads--;
4257 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4258 " on node %p of thread %p to %d\n",
4259 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4264 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4266 this_thr->th.th_cg_roots->cg_nthreads++;
4267 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4268 " node %p of thread %p to %d\n",
4269 this_thr, this_thr->th.th_cg_roots,
4270 this_thr->th.th_cg_roots->cg_root,
4271 this_thr->th.th_cg_roots->cg_nthreads));
4272 this_thr->th.th_current_task->td_icvs.thread_limit =
4273 this_thr->th.th_cg_roots->cg_thread_limit;
4278 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4281 sizeof(dispatch_private_info_t) *
4282 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4283 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4284 team->t.t_max_nproc));
4285 KMP_ASSERT(dispatch);
4286 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4287 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4289 dispatch->th_disp_index = 0;
4290 dispatch->th_doacross_buf_idx = 0;
4291 if (!dispatch->th_disp_buffer) {
4292 dispatch->th_disp_buffer =
4293 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4295 if (__kmp_storage_map) {
4296 __kmp_print_storage_map_gtid(
4297 gtid, &dispatch->th_disp_buffer[0],
4298 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4300 : __kmp_dispatch_num_buffers],
4302 "th_%d.th_dispatch.th_disp_buffer "
4303 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4304 gtid, team->t.t_id, gtid);
4307 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4310 dispatch->th_dispatch_pr_current = 0;
4311 dispatch->th_dispatch_sh_current = 0;
4313 dispatch->th_deo_fcn = 0;
4314 dispatch->th_dxo_fcn = 0;
4317 this_thr->th.th_next_pool = NULL;
4319 if (!this_thr->th.th_task_state_memo_stack) {
4321 this_thr->th.th_task_state_memo_stack =
4322 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4323 this_thr->th.th_task_state_top = 0;
4324 this_thr->th.th_task_state_stack_sz = 4;
4325 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4327 this_thr->th.th_task_state_memo_stack[i] = 0;
4330 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4331 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4341kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4343 kmp_team_t *serial_team;
4344 kmp_info_t *new_thr;
4347 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4348 KMP_DEBUG_ASSERT(root && team);
4349#if !KMP_NESTED_HOT_TEAMS
4350 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4355 if (__kmp_thread_pool) {
4356 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4357 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4358 if (new_thr == __kmp_thread_pool_insert_pt) {
4359 __kmp_thread_pool_insert_pt = NULL;
4361 TCW_4(new_thr->th.th_in_pool, FALSE);
4362 __kmp_suspend_initialize_thread(new_thr);
4363 __kmp_lock_suspend_mx(new_thr);
4364 if (new_thr->th.th_active_in_pool == TRUE) {
4365 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4366 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4367 new_thr->th.th_active_in_pool = FALSE;
4369 __kmp_unlock_suspend_mx(new_thr);
4371 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4372 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4373 KMP_ASSERT(!new_thr->th.th_team);
4374 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4377 __kmp_initialize_info(new_thr, team, new_tid,
4378 new_thr->th.th_info.ds.ds_gtid);
4379 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4381 TCW_4(__kmp_nth, __kmp_nth + 1);
4383 new_thr->th.th_task_state = 0;
4384 new_thr->th.th_task_state_top = 0;
4385 new_thr->th.th_task_state_stack_sz = 4;
4387 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4389 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4393#ifdef KMP_ADJUST_BLOCKTIME
4396 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4397 if (__kmp_nth > __kmp_avail_proc) {
4398 __kmp_zero_bt = TRUE;
4407 kmp_balign_t *balign = new_thr->th.th_bar;
4408 for (b = 0; b < bs_last_barrier; ++b)
4409 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4412 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4413 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4420 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4421 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4426 if (!TCR_4(__kmp_init_monitor)) {
4427 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4428 if (!TCR_4(__kmp_init_monitor)) {
4429 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4430 TCW_4(__kmp_init_monitor, 1);
4431 __kmp_create_monitor(&__kmp_monitor);
4432 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4443 while (TCR_4(__kmp_init_monitor) < 2) {
4446 KF_TRACE(10, (
"after monitor thread has started\n"));
4449 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4456 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4458 : __kmp_hidden_helper_threads_num + 1;
4460 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4462 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4465 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4466 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4471 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4473 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4475#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4478 __itt_suppress_mark_range(
4479 __itt_suppress_range, __itt_suppress_threading_errors,
4480 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4481 __itt_suppress_mark_range(
4482 __itt_suppress_range, __itt_suppress_threading_errors,
4483 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4485 __itt_suppress_mark_range(
4486 __itt_suppress_range, __itt_suppress_threading_errors,
4487 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4489 __itt_suppress_mark_range(__itt_suppress_range,
4490 __itt_suppress_threading_errors,
4491 &new_thr->th.th_suspend_init_count,
4492 sizeof(new_thr->th.th_suspend_init_count));
4495 __itt_suppress_mark_range(__itt_suppress_range,
4496 __itt_suppress_threading_errors,
4497 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4498 sizeof(new_thr->th.th_bar[0].bb.b_go));
4499 __itt_suppress_mark_range(__itt_suppress_range,
4500 __itt_suppress_threading_errors,
4501 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4502 sizeof(new_thr->th.th_bar[1].bb.b_go));
4503 __itt_suppress_mark_range(__itt_suppress_range,
4504 __itt_suppress_threading_errors,
4505 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4506 sizeof(new_thr->th.th_bar[2].bb.b_go));
4508 if (__kmp_storage_map) {
4509 __kmp_print_thread_storage_map(new_thr, new_gtid);
4514 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4515 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4516 new_thr->th.th_serial_team = serial_team =
4517 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4521 proc_bind_default, &r_icvs,
4522 0 USE_NESTED_HOT_ARG(NULL));
4524 KMP_ASSERT(serial_team);
4525 serial_team->t.t_serialized = 0;
4527 serial_team->t.t_threads[0] = new_thr;
4529 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4533 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4536 __kmp_initialize_fast_memory(new_thr);
4540 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4541 __kmp_initialize_bget(new_thr);
4544 __kmp_init_random(new_thr);
4548 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4549 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4552 kmp_balign_t *balign = new_thr->th.th_bar;
4553 for (b = 0; b < bs_last_barrier; ++b) {
4554 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4555 balign[b].bb.team = NULL;
4556 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4557 balign[b].bb.use_oncore_barrier = 0;
4560 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4561 new_thr->th.th_sleep_loc_type = flag_unset;
4563 new_thr->th.th_spin_here = FALSE;
4564 new_thr->th.th_next_waiting = 0;
4566 new_thr->th.th_blocking =
false;
4569#if KMP_AFFINITY_SUPPORTED
4570 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4571 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4572 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4573 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4575 new_thr->th.th_def_allocator = __kmp_def_allocator;
4576 new_thr->th.th_prev_level = 0;
4577 new_thr->th.th_prev_num_threads = 1;
4579 TCW_4(new_thr->th.th_in_pool, FALSE);
4580 new_thr->th.th_active_in_pool = FALSE;
4581 TCW_4(new_thr->th.th_active, TRUE);
4589 if (__kmp_adjust_gtid_mode) {
4590 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4591 if (TCR_4(__kmp_gtid_mode) != 2) {
4592 TCW_4(__kmp_gtid_mode, 2);
4595 if (TCR_4(__kmp_gtid_mode) != 1) {
4596 TCW_4(__kmp_gtid_mode, 1);
4601#ifdef KMP_ADJUST_BLOCKTIME
4604 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4605 if (__kmp_nth > __kmp_avail_proc) {
4606 __kmp_zero_bt = TRUE;
4613 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4614 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4616 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4618 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4629static void __kmp_reinitialize_team(kmp_team_t *team,
4630 kmp_internal_control_t *new_icvs,
4632 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4633 team->t.t_threads[0], team));
4634 KMP_DEBUG_ASSERT(team && new_icvs);
4635 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4636 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4638 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4640 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4641 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4643 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4644 team->t.t_threads[0], team));
4650static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4651 kmp_internal_control_t *new_icvs,
4653 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4656 KMP_DEBUG_ASSERT(team);
4657 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4658 KMP_DEBUG_ASSERT(team->t.t_threads);
4661 team->t.t_master_tid = 0;
4663 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4664 team->t.t_nproc = new_nproc;
4667 team->t.t_next_pool = NULL;
4671 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4672 team->t.t_invoke = NULL;
4675 team->t.t_sched.sched = new_icvs->sched.sched;
4677#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4678 team->t.t_fp_control_saved = FALSE;
4679 team->t.t_x87_fpu_control_word = 0;
4680 team->t.t_mxcsr = 0;
4683 team->t.t_construct = 0;
4685 team->t.t_ordered.dt.t_value = 0;
4686 team->t.t_master_active = FALSE;
4689 team->t.t_copypriv_data = NULL;
4692 team->t.t_copyin_counter = 0;
4695 team->t.t_control_stack_top = NULL;
4697 __kmp_reinitialize_team(team, new_icvs, loc);
4700 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4703#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4706__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4707 if (KMP_AFFINITY_CAPABLE()) {
4709 if (old_mask != NULL) {
4710 status = __kmp_get_system_affinity(old_mask, TRUE);
4713 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4717 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4722#if KMP_AFFINITY_SUPPORTED
4728static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4730 if (KMP_HIDDEN_HELPER_TEAM(team))
4733 kmp_info_t *master_th = team->t.t_threads[0];
4734 KMP_DEBUG_ASSERT(master_th != NULL);
4735 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4736 int first_place = master_th->th.th_first_place;
4737 int last_place = master_th->th.th_last_place;
4738 int masters_place = master_th->th.th_current_place;
4739 int num_masks = __kmp_affinity.num_masks;
4740 team->t.t_first_place = first_place;
4741 team->t.t_last_place = last_place;
4743 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4744 "bound to place %d partition = [%d,%d]\n",
4745 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4746 team->t.t_id, masters_place, first_place, last_place));
4748 switch (proc_bind) {
4750 case proc_bind_default:
4753 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4756 case proc_bind_primary: {
4758 int n_th = team->t.t_nproc;
4759 for (f = 1; f < n_th; f++) {
4760 kmp_info_t *th = team->t.t_threads[f];
4761 KMP_DEBUG_ASSERT(th != NULL);
4762 th->th.th_first_place = first_place;
4763 th->th.th_last_place = last_place;
4764 th->th.th_new_place = masters_place;
4765 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4766 team->t.t_display_affinity != 1) {
4767 team->t.t_display_affinity = 1;
4770 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4771 "partition = [%d,%d]\n",
4772 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4773 f, masters_place, first_place, last_place));
4777 case proc_bind_close: {
4779 int n_th = team->t.t_nproc;
4781 if (first_place <= last_place) {
4782 n_places = last_place - first_place + 1;
4784 n_places = num_masks - first_place + last_place + 1;
4786 if (n_th <= n_places) {
4787 int place = masters_place;
4788 for (f = 1; f < n_th; f++) {
4789 kmp_info_t *th = team->t.t_threads[f];
4790 KMP_DEBUG_ASSERT(th != NULL);
4792 if (place == last_place) {
4793 place = first_place;
4794 }
else if (place == (num_masks - 1)) {
4799 th->th.th_first_place = first_place;
4800 th->th.th_last_place = last_place;
4801 th->th.th_new_place = place;
4802 if (__kmp_display_affinity && place != th->th.th_current_place &&
4803 team->t.t_display_affinity != 1) {
4804 team->t.t_display_affinity = 1;
4807 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4808 "partition = [%d,%d]\n",
4809 __kmp_gtid_from_thread(team->t.t_threads[f]),
4810 team->t.t_id, f, place, first_place, last_place));
4813 int S, rem, gap, s_count;
4814 S = n_th / n_places;
4816 rem = n_th - (S * n_places);
4817 gap = rem > 0 ? n_places / rem : n_places;
4818 int place = masters_place;
4820 for (f = 0; f < n_th; f++) {
4821 kmp_info_t *th = team->t.t_threads[f];
4822 KMP_DEBUG_ASSERT(th != NULL);
4824 th->th.th_first_place = first_place;
4825 th->th.th_last_place = last_place;
4826 th->th.th_new_place = place;
4827 if (__kmp_display_affinity && place != th->th.th_current_place &&
4828 team->t.t_display_affinity != 1) {
4829 team->t.t_display_affinity = 1;
4833 if ((s_count == S) && rem && (gap_ct == gap)) {
4835 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4837 if (place == last_place) {
4838 place = first_place;
4839 }
else if (place == (num_masks - 1)) {
4847 }
else if (s_count == S) {
4848 if (place == last_place) {
4849 place = first_place;
4850 }
else if (place == (num_masks - 1)) {
4860 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4861 "partition = [%d,%d]\n",
4862 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4863 th->th.th_new_place, first_place, last_place));
4865 KMP_DEBUG_ASSERT(place == masters_place);
4869 case proc_bind_spread: {
4871 int n_th = team->t.t_nproc;
4874 if (first_place <= last_place) {
4875 n_places = last_place - first_place + 1;
4877 n_places = num_masks - first_place + last_place + 1;
4879 if (n_th <= n_places) {
4882 if (n_places != num_masks) {
4883 int S = n_places / n_th;
4884 int s_count, rem, gap, gap_ct;
4886 place = masters_place;
4887 rem = n_places - n_th * S;
4888 gap = rem ? n_th / rem : 1;
4891 if (update_master_only == 1)
4893 for (f = 0; f < thidx; f++) {
4894 kmp_info_t *th = team->t.t_threads[f];
4895 KMP_DEBUG_ASSERT(th != NULL);
4897 th->th.th_first_place = place;
4898 th->th.th_new_place = place;
4899 if (__kmp_display_affinity && place != th->th.th_current_place &&
4900 team->t.t_display_affinity != 1) {
4901 team->t.t_display_affinity = 1;
4904 while (s_count < S) {
4905 if (place == last_place) {
4906 place = first_place;
4907 }
else if (place == (num_masks - 1)) {
4914 if (rem && (gap_ct == gap)) {
4915 if (place == last_place) {
4916 place = first_place;
4917 }
else if (place == (num_masks - 1)) {
4925 th->th.th_last_place = place;
4928 if (place == last_place) {
4929 place = first_place;
4930 }
else if (place == (num_masks - 1)) {
4937 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4938 "partition = [%d,%d], num_masks: %u\n",
4939 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4940 f, th->th.th_new_place, th->th.th_first_place,
4941 th->th.th_last_place, num_masks));
4947 double current =
static_cast<double>(masters_place);
4949 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
4954 if (update_master_only == 1)
4956 for (f = 0; f < thidx; f++) {
4957 first =
static_cast<int>(current);
4958 last =
static_cast<int>(current + spacing) - 1;
4959 KMP_DEBUG_ASSERT(last >= first);
4960 if (first >= n_places) {
4961 if (masters_place) {
4964 if (first == (masters_place + 1)) {
4965 KMP_DEBUG_ASSERT(f == n_th);
4968 if (last == masters_place) {
4969 KMP_DEBUG_ASSERT(f == (n_th - 1));
4973 KMP_DEBUG_ASSERT(f == n_th);
4978 if (last >= n_places) {
4979 last = (n_places - 1);
4984 KMP_DEBUG_ASSERT(0 <= first);
4985 KMP_DEBUG_ASSERT(n_places > first);
4986 KMP_DEBUG_ASSERT(0 <= last);
4987 KMP_DEBUG_ASSERT(n_places > last);
4988 KMP_DEBUG_ASSERT(last_place >= first_place);
4989 th = team->t.t_threads[f];
4990 KMP_DEBUG_ASSERT(th);
4991 th->th.th_first_place = first;
4992 th->th.th_new_place = place;
4993 th->th.th_last_place = last;
4994 if (__kmp_display_affinity && place != th->th.th_current_place &&
4995 team->t.t_display_affinity != 1) {
4996 team->t.t_display_affinity = 1;
4999 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5000 "partition = [%d,%d], spacing = %.4f\n",
5001 __kmp_gtid_from_thread(team->t.t_threads[f]),
5002 team->t.t_id, f, th->th.th_new_place,
5003 th->th.th_first_place, th->th.th_last_place, spacing));
5007 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5009 int S, rem, gap, s_count;
5010 S = n_th / n_places;
5012 rem = n_th - (S * n_places);
5013 gap = rem > 0 ? n_places / rem : n_places;
5014 int place = masters_place;
5017 if (update_master_only == 1)
5019 for (f = 0; f < thidx; f++) {
5020 kmp_info_t *th = team->t.t_threads[f];
5021 KMP_DEBUG_ASSERT(th != NULL);
5023 th->th.th_first_place = place;
5024 th->th.th_last_place = place;
5025 th->th.th_new_place = place;
5026 if (__kmp_display_affinity && place != th->th.th_current_place &&
5027 team->t.t_display_affinity != 1) {
5028 team->t.t_display_affinity = 1;
5032 if ((s_count == S) && rem && (gap_ct == gap)) {
5034 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5036 if (place == last_place) {
5037 place = first_place;
5038 }
else if (place == (num_masks - 1)) {
5046 }
else if (s_count == S) {
5047 if (place == last_place) {
5048 place = first_place;
5049 }
else if (place == (num_masks - 1)) {
5058 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5059 "partition = [%d,%d]\n",
5060 __kmp_gtid_from_thread(team->t.t_threads[f]),
5061 team->t.t_id, f, th->th.th_new_place,
5062 th->th.th_first_place, th->th.th_last_place));
5064 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5072 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5080__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5082 ompt_data_t ompt_parallel_data,
5084 kmp_proc_bind_t new_proc_bind,
5085 kmp_internal_control_t *new_icvs,
5086 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5087 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5090 int use_hot_team = !root->r.r_active;
5092 int do_place_partition = 1;
5094 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5095 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5096 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5099#if KMP_NESTED_HOT_TEAMS
5100 kmp_hot_team_ptr_t *hot_teams;
5102 team = master->th.th_team;
5103 level = team->t.t_active_level;
5104 if (master->th.th_teams_microtask) {
5105 if (master->th.th_teams_size.nteams > 1 &&
5108 (microtask_t)__kmp_teams_master ||
5109 master->th.th_teams_level <
5116 if ((master->th.th_teams_size.nteams == 1 &&
5117 master->th.th_teams_level >= team->t.t_level) ||
5118 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5119 do_place_partition = 0;
5121 hot_teams = master->th.th_hot_teams;
5122 if (level < __kmp_hot_teams_max_level && hot_teams &&
5123 hot_teams[level].hot_team) {
5131 KMP_DEBUG_ASSERT(new_nproc == 1);
5135 if (use_hot_team && new_nproc > 1) {
5136 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5137#if KMP_NESTED_HOT_TEAMS
5138 team = hot_teams[level].hot_team;
5140 team = root->r.r_hot_team;
5143 if (__kmp_tasking_mode != tskm_immediate_exec) {
5144 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5145 "task_team[1] = %p before reinit\n",
5146 team->t.t_task_team[0], team->t.t_task_team[1]));
5150 if (team->t.t_nproc != new_nproc &&
5151 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5153 int old_nthr = team->t.t_nproc;
5154 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5159 if (do_place_partition == 0)
5160 team->t.t_proc_bind = proc_bind_default;
5164 if (team->t.t_nproc == new_nproc) {
5165 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5168 if (team->t.t_size_changed == -1) {
5169 team->t.t_size_changed = 1;
5171 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5175 kmp_r_sched_t new_sched = new_icvs->sched;
5177 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5179 __kmp_reinitialize_team(team, new_icvs,
5180 root->r.r_uber_thread->th.th_ident);
5182 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5183 team->t.t_threads[0], team));
5184 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5186#if KMP_AFFINITY_SUPPORTED
5187 if ((team->t.t_size_changed == 0) &&
5188 (team->t.t_proc_bind == new_proc_bind)) {
5189 if (new_proc_bind == proc_bind_spread) {
5190 if (do_place_partition) {
5192 __kmp_partition_places(team, 1);
5195 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5196 "proc_bind = %d, partition = [%d,%d]\n",
5197 team->t.t_id, new_proc_bind, team->t.t_first_place,
5198 team->t.t_last_place));
5200 if (do_place_partition) {
5201 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5202 __kmp_partition_places(team);
5206 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5208 }
else if (team->t.t_nproc > new_nproc) {
5210 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5213 team->t.t_size_changed = 1;
5214 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5217 __kmp_add_threads_to_team(team, new_nproc);
5219#if KMP_NESTED_HOT_TEAMS
5220 if (__kmp_hot_teams_mode == 0) {
5223 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5224 hot_teams[level].hot_team_nth = new_nproc;
5227 for (f = new_nproc; f < team->t.t_nproc; f++) {
5228 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5229 if (__kmp_tasking_mode != tskm_immediate_exec) {
5232 team->t.t_threads[f]->th.th_task_team = NULL;
5234 __kmp_free_thread(team->t.t_threads[f]);
5235 team->t.t_threads[f] = NULL;
5237#if KMP_NESTED_HOT_TEAMS
5242 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5243 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5244 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5245 for (
int b = 0; b < bs_last_barrier; ++b) {
5246 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5247 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5249 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5254 team->t.t_nproc = new_nproc;
5256 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5257 __kmp_reinitialize_team(team, new_icvs,
5258 root->r.r_uber_thread->th.th_ident);
5261 for (f = 0; f < new_nproc; ++f) {
5262 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5267 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5268 team->t.t_threads[0], team));
5270 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5273 for (f = 0; f < team->t.t_nproc; f++) {
5274 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5275 team->t.t_threads[f]->th.th_team_nproc ==
5280 if (do_place_partition) {
5281 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5282#if KMP_AFFINITY_SUPPORTED
5283 __kmp_partition_places(team);
5287#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5288 kmp_affin_mask_t *old_mask;
5289 if (KMP_AFFINITY_CAPABLE()) {
5290 KMP_CPU_ALLOC(old_mask);
5295 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5297 int old_nproc = team->t.t_nproc;
5298 team->t.t_size_changed = 1;
5300#if KMP_NESTED_HOT_TEAMS
5301 int avail_threads = hot_teams[level].hot_team_nth;
5302 if (new_nproc < avail_threads)
5303 avail_threads = new_nproc;
5304 kmp_info_t **other_threads = team->t.t_threads;
5305 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5309 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5310 for (b = 0; b < bs_last_barrier; ++b) {
5311 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5312 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5314 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5318 if (hot_teams[level].hot_team_nth >= new_nproc) {
5321 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5322 team->t.t_nproc = new_nproc;
5326 team->t.t_nproc = hot_teams[level].hot_team_nth;
5327 hot_teams[level].hot_team_nth = new_nproc;
5329 if (team->t.t_max_nproc < new_nproc) {
5331 __kmp_reallocate_team_arrays(team, new_nproc);
5332 __kmp_reinitialize_team(team, new_icvs, NULL);
5335#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5341 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5345 for (f = team->t.t_nproc; f < new_nproc; f++) {
5346 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5347 KMP_DEBUG_ASSERT(new_worker);
5348 team->t.t_threads[f] = new_worker;
5351 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5352 "join=%llu, plain=%llu\n",
5353 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5354 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5355 team->t.t_bar[bs_plain_barrier].b_arrived));
5359 kmp_balign_t *balign = new_worker->th.th_bar;
5360 for (b = 0; b < bs_last_barrier; ++b) {
5361 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5362 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5363 KMP_BARRIER_PARENT_FLAG);
5365 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5371#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5372 if (KMP_AFFINITY_CAPABLE()) {
5374 __kmp_set_system_affinity(old_mask, TRUE);
5375 KMP_CPU_FREE(old_mask);
5378#if KMP_NESTED_HOT_TEAMS
5381 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5384 __kmp_add_threads_to_team(team, new_nproc);
5388 __kmp_initialize_team(team, new_nproc, new_icvs,
5389 root->r.r_uber_thread->th.th_ident);
5392 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5393 for (f = 0; f < team->t.t_nproc; ++f)
5394 __kmp_initialize_info(team->t.t_threads[f], team, f,
5395 __kmp_gtid_from_tid(f, team));
5403 for (f = old_nproc; f < team->t.t_nproc; ++f)
5404 team->t.t_threads[f]->th.th_task_state =
5405 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5408 kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state;
5409 for (f = old_nproc; f < team->t.t_nproc; ++f)
5410 team->t.t_threads[f]->th.th_task_state = old_state;
5414 for (f = 0; f < team->t.t_nproc; ++f) {
5415 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5416 team->t.t_threads[f]->th.th_team_nproc ==
5421 if (do_place_partition) {
5422 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5423#if KMP_AFFINITY_SUPPORTED
5424 __kmp_partition_places(team);
5429 kmp_info_t *master = team->t.t_threads[0];
5430 if (master->th.th_teams_microtask) {
5431 for (f = 1; f < new_nproc; ++f) {
5433 kmp_info_t *thr = team->t.t_threads[f];
5434 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5435 thr->th.th_teams_level = master->th.th_teams_level;
5436 thr->th.th_teams_size = master->th.th_teams_size;
5439#if KMP_NESTED_HOT_TEAMS
5443 for (f = 1; f < new_nproc; ++f) {
5444 kmp_info_t *thr = team->t.t_threads[f];
5446 kmp_balign_t *balign = thr->th.th_bar;
5447 for (b = 0; b < bs_last_barrier; ++b) {
5448 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5449 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5451 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5459 __kmp_alloc_argv_entries(argc, team, TRUE);
5460 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5464 KF_TRACE(10, (
" hot_team = %p\n", team));
5467 if (__kmp_tasking_mode != tskm_immediate_exec) {
5468 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5469 "task_team[1] = %p after reinit\n",
5470 team->t.t_task_team[0], team->t.t_task_team[1]));
5475 __ompt_team_assign_id(team, ompt_parallel_data);
5485 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5488 if (team->t.t_max_nproc >= max_nproc) {
5490 __kmp_team_pool = team->t.t_next_pool;
5492 if (max_nproc > 1 &&
5493 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5495 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5500 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5502 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5503 "task_team[1] %p to NULL\n",
5504 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5505 team->t.t_task_team[0] = NULL;
5506 team->t.t_task_team[1] = NULL;
5509 __kmp_alloc_argv_entries(argc, team, TRUE);
5510 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5513 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5514 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5517 for (b = 0; b < bs_last_barrier; ++b) {
5518 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5520 team->t.t_bar[b].b_master_arrived = 0;
5521 team->t.t_bar[b].b_team_arrived = 0;
5526 team->t.t_proc_bind = new_proc_bind;
5528 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5532 __ompt_team_assign_id(team, ompt_parallel_data);
5544 team = __kmp_reap_team(team);
5545 __kmp_team_pool = team;
5550 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5553 team->t.t_max_nproc = max_nproc;
5554 if (max_nproc > 1 &&
5555 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5557 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5562 __kmp_allocate_team_arrays(team, max_nproc);
5564 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5565 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5567 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5569 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5570 team->t.t_task_team[0] = NULL;
5572 team->t.t_task_team[1] = NULL;
5575 if (__kmp_storage_map) {
5576 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5580 __kmp_alloc_argv_entries(argc, team, FALSE);
5581 team->t.t_argc = argc;
5584 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5585 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5588 for (b = 0; b < bs_last_barrier; ++b) {
5589 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5591 team->t.t_bar[b].b_master_arrived = 0;
5592 team->t.t_bar[b].b_team_arrived = 0;
5597 team->t.t_proc_bind = new_proc_bind;
5600 __ompt_team_assign_id(team, ompt_parallel_data);
5601 team->t.ompt_serialized_team_info = NULL;
5606 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5617void __kmp_free_team(kmp_root_t *root,
5618 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5620 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5624 KMP_DEBUG_ASSERT(root);
5625 KMP_DEBUG_ASSERT(team);
5626 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5627 KMP_DEBUG_ASSERT(team->t.t_threads);
5629 int use_hot_team = team == root->r.r_hot_team;
5630#if KMP_NESTED_HOT_TEAMS
5633 level = team->t.t_active_level - 1;
5634 if (master->th.th_teams_microtask) {
5635 if (master->th.th_teams_size.nteams > 1) {
5639 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5640 master->th.th_teams_level == team->t.t_level) {
5646 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5648 if (level < __kmp_hot_teams_max_level) {
5649 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5656 TCW_SYNC_PTR(team->t.t_pkfn,
5659 team->t.t_copyin_counter = 0;
5664 if (!use_hot_team) {
5665 if (__kmp_tasking_mode != tskm_immediate_exec) {
5667 for (f = 1; f < team->t.t_nproc; ++f) {
5668 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5669 kmp_info_t *th = team->t.t_threads[f];
5670 volatile kmp_uint32 *state = &th->th.th_reap_state;
5671 while (*state != KMP_SAFE_TO_REAP) {
5675 if (!__kmp_is_thread_alive(th, &ecode)) {
5676 *state = KMP_SAFE_TO_REAP;
5681 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5682 if (fl.is_sleeping())
5683 fl.resume(__kmp_gtid_from_thread(th));
5690 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5691 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5692 if (task_team != NULL) {
5693 for (f = 0; f < team->t.t_nproc; ++f) {
5694 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5695 team->t.t_threads[f]->th.th_task_team = NULL;
5699 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5700 __kmp_get_gtid(), task_team, team->t.t_id));
5701#if KMP_NESTED_HOT_TEAMS
5702 __kmp_free_task_team(master, task_team);
5704 team->t.t_task_team[tt_idx] = NULL;
5710 team->t.t_parent = NULL;
5711 team->t.t_level = 0;
5712 team->t.t_active_level = 0;
5715 for (f = 1; f < team->t.t_nproc; ++f) {
5716 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5717 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5718 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5721 __kmp_free_thread(team->t.t_threads[f]);
5724 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5727 team->t.b->go_release();
5728 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5729 for (f = 1; f < team->t.t_nproc; ++f) {
5730 if (team->t.b->sleep[f].sleep) {
5731 __kmp_atomic_resume_64(
5732 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5733 (kmp_atomic_flag_64<> *)NULL);
5738 for (
int f = 1; f < team->t.t_nproc; ++f) {
5739 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5745 for (f = 1; f < team->t.t_nproc; ++f) {
5746 team->t.t_threads[f] = NULL;
5749 if (team->t.t_max_nproc > 1 &&
5750 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5751 distributedBarrier::deallocate(team->t.b);
5756 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5757 __kmp_team_pool = (
volatile kmp_team_t *)team;
5760 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5761 team->t.t_threads[1]->th.th_cg_roots);
5762 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5764 for (f = 1; f < team->t.t_nproc; ++f) {
5765 kmp_info_t *thr = team->t.t_threads[f];
5766 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5767 thr->th.th_cg_roots->cg_root == thr);
5769 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5770 thr->th.th_cg_roots = tmp->up;
5771 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5772 " up to node %p. cg_nthreads was %d\n",
5773 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5774 int i = tmp->cg_nthreads--;
5779 if (thr->th.th_cg_roots)
5780 thr->th.th_current_task->td_icvs.thread_limit =
5781 thr->th.th_cg_roots->cg_thread_limit;
5790kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5791 kmp_team_t *next_pool = team->t.t_next_pool;
5793 KMP_DEBUG_ASSERT(team);
5794 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5795 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5796 KMP_DEBUG_ASSERT(team->t.t_threads);
5797 KMP_DEBUG_ASSERT(team->t.t_argv);
5802 __kmp_free_team_arrays(team);
5803 if (team->t.t_argv != &team->t.t_inline_argv[0])
5804 __kmp_free((
void *)team->t.t_argv);
5836void __kmp_free_thread(kmp_info_t *this_th) {
5840 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5841 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5843 KMP_DEBUG_ASSERT(this_th);
5848 kmp_balign_t *balign = this_th->th.th_bar;
5849 for (b = 0; b < bs_last_barrier; ++b) {
5850 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5851 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5852 balign[b].bb.team = NULL;
5853 balign[b].bb.leaf_kids = 0;
5855 this_th->th.th_task_state = 0;
5856 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5859 TCW_PTR(this_th->th.th_team, NULL);
5860 TCW_PTR(this_th->th.th_root, NULL);
5861 TCW_PTR(this_th->th.th_dispatch, NULL);
5863 while (this_th->th.th_cg_roots) {
5864 this_th->th.th_cg_roots->cg_nthreads--;
5865 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5866 " %p of thread %p to %d\n",
5867 this_th, this_th->th.th_cg_roots,
5868 this_th->th.th_cg_roots->cg_root,
5869 this_th->th.th_cg_roots->cg_nthreads));
5870 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5871 if (tmp->cg_root == this_th) {
5872 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5874 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5875 this_th->th.th_cg_roots = tmp->up;
5878 if (tmp->cg_nthreads == 0) {
5881 this_th->th.th_cg_roots = NULL;
5891 __kmp_free_implicit_task(this_th);
5892 this_th->th.th_current_task = NULL;
5896 gtid = this_th->th.th_info.ds.ds_gtid;
5897 if (__kmp_thread_pool_insert_pt != NULL) {
5898 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5899 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5900 __kmp_thread_pool_insert_pt = NULL;
5909 if (__kmp_thread_pool_insert_pt != NULL) {
5910 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5912 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5914 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5915 scan = &((*scan)->th.th_next_pool))
5920 TCW_PTR(this_th->th.th_next_pool, *scan);
5921 __kmp_thread_pool_insert_pt = *scan = this_th;
5922 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5923 (this_th->th.th_info.ds.ds_gtid <
5924 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5925 TCW_4(this_th->th.th_in_pool, TRUE);
5926 __kmp_suspend_initialize_thread(this_th);
5927 __kmp_lock_suspend_mx(this_th);
5928 if (this_th->th.th_active == TRUE) {
5929 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5930 this_th->th.th_active_in_pool = TRUE;
5934 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5937 __kmp_unlock_suspend_mx(this_th);
5939 TCW_4(__kmp_nth, __kmp_nth - 1);
5941#ifdef KMP_ADJUST_BLOCKTIME
5944 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5945 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5946 if (__kmp_nth <= __kmp_avail_proc) {
5947 __kmp_zero_bt = FALSE;
5957void *__kmp_launch_thread(kmp_info_t *this_thr) {
5958#if OMP_PROFILING_SUPPORT
5959 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5961 if (ProfileTraceFile)
5962 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5965 int gtid = this_thr->th.th_info.ds.ds_gtid;
5967 kmp_team_t **
volatile pteam;
5970 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5972 if (__kmp_env_consistency_check) {
5973 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5977 if (ompd_state & OMPD_ENABLE_BP)
5978 ompd_bp_thread_begin();
5982 ompt_data_t *thread_data =
nullptr;
5983 if (ompt_enabled.enabled) {
5984 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5985 *thread_data = ompt_data_none;
5987 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5988 this_thr->th.ompt_thread_info.wait_id = 0;
5989 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5990 this_thr->th.ompt_thread_info.parallel_flags = 0;
5991 if (ompt_enabled.ompt_callback_thread_begin) {
5992 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5993 ompt_thread_worker, thread_data);
5995 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6000 while (!TCR_4(__kmp_global.g.g_done)) {
6001 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6005 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6008 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6011 if (ompt_enabled.enabled) {
6012 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6016 pteam = &this_thr->th.th_team;
6019 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6021 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6024 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6025 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6026 (*pteam)->t.t_pkfn));
6028 updateHWFPControl(*pteam);
6031 if (ompt_enabled.enabled) {
6032 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6036 rc = (*pteam)->t.t_invoke(gtid);
6040 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6041 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6042 (*pteam)->t.t_pkfn));
6045 if (ompt_enabled.enabled) {
6047 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6049 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6053 __kmp_join_barrier(gtid);
6056 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
6059 if (ompd_state & OMPD_ENABLE_BP)
6060 ompd_bp_thread_end();
6064 if (ompt_enabled.ompt_callback_thread_end) {
6065 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6069 this_thr->th.th_task_team = NULL;
6071 __kmp_common_destroy_gtid(gtid);
6073 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6076#if OMP_PROFILING_SUPPORT
6077 llvm::timeTraceProfilerFinishThread();
6084void __kmp_internal_end_dest(
void *specific_gtid) {
6087 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6089 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6093 __kmp_internal_end_thread(gtid);
6096#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6098__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6099 __kmp_internal_end_atexit();
6106void __kmp_internal_end_atexit(
void) {
6107 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6131 __kmp_internal_end_library(-1);
6133 __kmp_close_console();
6137static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6142 KMP_DEBUG_ASSERT(thread != NULL);
6144 gtid = thread->th.th_info.ds.ds_gtid;
6147 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6150 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6152 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6154 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6156 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6160 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6162 __kmp_release_64(&flag);
6167 __kmp_reap_worker(thread);
6179 if (thread->th.th_active_in_pool) {
6180 thread->th.th_active_in_pool = FALSE;
6181 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6182 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6186 __kmp_free_implicit_task(thread);
6190 __kmp_free_fast_memory(thread);
6193 __kmp_suspend_uninitialize_thread(thread);
6195 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6196 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6201#ifdef KMP_ADJUST_BLOCKTIME
6204 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6205 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6206 if (__kmp_nth <= __kmp_avail_proc) {
6207 __kmp_zero_bt = FALSE;
6213 if (__kmp_env_consistency_check) {
6214 if (thread->th.th_cons) {
6215 __kmp_free_cons_stack(thread->th.th_cons);
6216 thread->th.th_cons = NULL;
6220 if (thread->th.th_pri_common != NULL) {
6221 __kmp_free(thread->th.th_pri_common);
6222 thread->th.th_pri_common = NULL;
6225 if (thread->th.th_task_state_memo_stack != NULL) {
6226 __kmp_free(thread->th.th_task_state_memo_stack);
6227 thread->th.th_task_state_memo_stack = NULL;
6231 if (thread->th.th_local.bget_data != NULL) {
6232 __kmp_finalize_bget(thread);
6236#if KMP_AFFINITY_SUPPORTED
6237 if (thread->th.th_affin_mask != NULL) {
6238 KMP_CPU_FREE(thread->th.th_affin_mask);
6239 thread->th.th_affin_mask = NULL;
6243#if KMP_USE_HIER_SCHED
6244 if (thread->th.th_hier_bar_data != NULL) {
6245 __kmp_free(thread->th.th_hier_bar_data);
6246 thread->th.th_hier_bar_data = NULL;
6250 __kmp_reap_team(thread->th.th_serial_team);
6251 thread->th.th_serial_team = NULL;
6258static void __kmp_itthash_clean(kmp_info_t *th) {
6260 if (__kmp_itt_region_domains.count > 0) {
6261 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6262 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6264 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6265 __kmp_thread_free(th, bucket);
6270 if (__kmp_itt_barrier_domains.count > 0) {
6271 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6272 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6274 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6275 __kmp_thread_free(th, bucket);
6283static void __kmp_internal_end(
void) {
6287 __kmp_unregister_library();
6294 __kmp_reclaim_dead_roots();
6298 for (i = 0; i < __kmp_threads_capacity; i++)
6300 if (__kmp_root[i]->r.r_active)
6303 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6305 if (i < __kmp_threads_capacity) {
6317 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6318 if (TCR_4(__kmp_init_monitor)) {
6319 __kmp_reap_monitor(&__kmp_monitor);
6320 TCW_4(__kmp_init_monitor, 0);
6322 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6323 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6329 for (i = 0; i < __kmp_threads_capacity; i++) {
6330 if (__kmp_root[i]) {
6333 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6342 while (__kmp_thread_pool != NULL) {
6344 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6345 __kmp_thread_pool = thread->th.th_next_pool;
6347 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6348 thread->th.th_next_pool = NULL;
6349 thread->th.th_in_pool = FALSE;
6350 __kmp_reap_thread(thread, 0);
6352 __kmp_thread_pool_insert_pt = NULL;
6355 while (__kmp_team_pool != NULL) {
6357 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6358 __kmp_team_pool = team->t.t_next_pool;
6360 team->t.t_next_pool = NULL;
6361 __kmp_reap_team(team);
6364 __kmp_reap_task_teams();
6371 for (i = 0; i < __kmp_threads_capacity; i++) {
6372 kmp_info_t *thr = __kmp_threads[i];
6373 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6378 for (i = 0; i < __kmp_threads_capacity; ++i) {
6385 TCW_SYNC_4(__kmp_init_common, FALSE);
6387 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6395 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6396 if (TCR_4(__kmp_init_monitor)) {
6397 __kmp_reap_monitor(&__kmp_monitor);
6398 TCW_4(__kmp_init_monitor, 0);
6400 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6401 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6404 TCW_4(__kmp_init_gtid, FALSE);
6413void __kmp_internal_end_library(
int gtid_req) {
6420 if (__kmp_global.g.g_abort) {
6421 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6425 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6426 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6431 if (TCR_4(__kmp_init_hidden_helper) &&
6432 !TCR_4(__kmp_hidden_helper_team_done)) {
6433 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6435 __kmp_hidden_helper_main_thread_release();
6437 __kmp_hidden_helper_threads_deinitz_wait();
6443 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6445 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6446 if (gtid == KMP_GTID_SHUTDOWN) {
6447 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6448 "already shutdown\n"));
6450 }
else if (gtid == KMP_GTID_MONITOR) {
6451 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6452 "registered, or system shutdown\n"));
6454 }
else if (gtid == KMP_GTID_DNE) {
6455 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6458 }
else if (KMP_UBER_GTID(gtid)) {
6460 if (__kmp_root[gtid]->r.r_active) {
6461 __kmp_global.g.g_abort = -1;
6462 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6463 __kmp_unregister_library();
6465 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6469 __kmp_itthash_clean(__kmp_threads[gtid]);
6472 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6473 __kmp_unregister_root_current_thread(gtid);
6480#ifdef DUMP_DEBUG_ON_EXIT
6481 if (__kmp_debug_buf)
6482 __kmp_dump_debug_buffer();
6487 __kmp_unregister_library();
6492 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6495 if (__kmp_global.g.g_abort) {
6496 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6498 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6501 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6502 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6511 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6514 __kmp_internal_end();
6516 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6517 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6519 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6521#ifdef DUMP_DEBUG_ON_EXIT
6522 if (__kmp_debug_buf)
6523 __kmp_dump_debug_buffer();
6527 __kmp_close_console();
6530 __kmp_fini_allocator();
6534void __kmp_internal_end_thread(
int gtid_req) {
6543 if (__kmp_global.g.g_abort) {
6544 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6548 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6549 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6554 if (TCR_4(__kmp_init_hidden_helper) &&
6555 !TCR_4(__kmp_hidden_helper_team_done)) {
6556 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6558 __kmp_hidden_helper_main_thread_release();
6560 __kmp_hidden_helper_threads_deinitz_wait();
6567 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6569 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6570 if (gtid == KMP_GTID_SHUTDOWN) {
6571 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6572 "already shutdown\n"));
6574 }
else if (gtid == KMP_GTID_MONITOR) {
6575 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6576 "registered, or system shutdown\n"));
6578 }
else if (gtid == KMP_GTID_DNE) {
6579 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6583 }
else if (KMP_UBER_GTID(gtid)) {
6585 if (__kmp_root[gtid]->r.r_active) {
6586 __kmp_global.g.g_abort = -1;
6587 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6589 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6593 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6595 __kmp_unregister_root_current_thread(gtid);
6599 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6602 __kmp_threads[gtid]->th.th_task_team = NULL;
6606 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6612 if (__kmp_pause_status != kmp_hard_paused)
6616 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6621 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6624 if (__kmp_global.g.g_abort) {
6625 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6627 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6630 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6631 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6642 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6644 for (i = 0; i < __kmp_threads_capacity; ++i) {
6645 if (KMP_UBER_GTID(i)) {
6648 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6649 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6650 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6657 __kmp_internal_end();
6659 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6660 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6662 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6664#ifdef DUMP_DEBUG_ON_EXIT
6665 if (__kmp_debug_buf)
6666 __kmp_dump_debug_buffer();
6673static long __kmp_registration_flag = 0;
6675static char *__kmp_registration_str = NULL;
6678static inline char *__kmp_reg_status_name() {
6684#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6685 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6688 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6692void __kmp_register_library_startup(
void) {
6694 char *name = __kmp_reg_status_name();
6700#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6701 __kmp_initialize_system_tick();
6703 __kmp_read_system_time(&time.dtime);
6704 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6705 __kmp_registration_str =
6706 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6707 __kmp_registration_flag, KMP_LIBRARY_FILE);
6709 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6710 __kmp_registration_str));
6716#if defined(KMP_USE_SHM)
6717 char *shm_name = __kmp_str_format(
"/%s", name);
6718 int shm_preexist = 0;
6720 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6721 if ((fd1 == -1) && (errno == EEXIST)) {
6724 fd1 = shm_open(shm_name, O_RDWR, 0666);
6727 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6733 }
else if (fd1 == -1) {
6736 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM2"), KMP_ERR(errno),
6739 if (shm_preexist == 0) {
6741 if (ftruncate(fd1, SHM_SIZE) == -1) {
6743 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6744 KMP_ERR(errno), __kmp_msg_null);
6748 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6749 if (data1 == MAP_FAILED) {
6751 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6754 if (shm_preexist == 0) {
6755 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6758 value = __kmp_str_format(
"%s", data1);
6759 munmap(data1, SHM_SIZE);
6763 __kmp_env_set(name, __kmp_registration_str, 0);
6765 value = __kmp_env_get(name);
6768 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6775 char *flag_addr_str = NULL;
6776 char *flag_val_str = NULL;
6777 char const *file_name = NULL;
6778 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6779 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6782 unsigned long *flag_addr = 0;
6783 unsigned long flag_val = 0;
6784 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6785 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6786 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6790 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6804 file_name =
"unknown library";
6809 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6810 if (!__kmp_str_match_true(duplicate_ok)) {
6812 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6813 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6815 KMP_INTERNAL_FREE(duplicate_ok);
6816 __kmp_duplicate_library_ok = 1;
6821#if defined(KMP_USE_SHM)
6823 shm_unlink(shm_name);
6826 __kmp_env_unset(name);
6830 KMP_DEBUG_ASSERT(0);
6834 KMP_INTERNAL_FREE((
void *)value);
6835#if defined(KMP_USE_SHM)
6836 KMP_INTERNAL_FREE((
void *)shm_name);
6839 KMP_INTERNAL_FREE((
void *)name);
6843void __kmp_unregister_library(
void) {
6845 char *name = __kmp_reg_status_name();
6848#if defined(KMP_USE_SHM)
6849 char *shm_name = __kmp_str_format(
"/%s", name);
6850 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6855 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6856 if (data1 != MAP_FAILED) {
6857 value = __kmp_str_format(
"%s", data1);
6858 munmap(data1, SHM_SIZE);
6862 value = __kmp_env_get(name);
6865 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6866 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6867 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6869#if defined(KMP_USE_SHM)
6870 shm_unlink(shm_name);
6872 __kmp_env_unset(name);
6876#if defined(KMP_USE_SHM)
6877 KMP_INTERNAL_FREE(shm_name);
6880 KMP_INTERNAL_FREE(__kmp_registration_str);
6881 KMP_INTERNAL_FREE(value);
6882 KMP_INTERNAL_FREE(name);
6884 __kmp_registration_flag = 0;
6885 __kmp_registration_str = NULL;
6892#if KMP_MIC_SUPPORTED
6894static void __kmp_check_mic_type() {
6895 kmp_cpuid_t cpuid_state = {0};
6896 kmp_cpuid_t *cs_p = &cpuid_state;
6897 __kmp_x86_cpuid(1, 0, cs_p);
6899 if ((cs_p->eax & 0xff0) == 0xB10) {
6900 __kmp_mic_type = mic2;
6901 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6902 __kmp_mic_type = mic3;
6904 __kmp_mic_type = non_mic;
6911static void __kmp_user_level_mwait_init() {
6912 struct kmp_cpuid buf;
6913 __kmp_x86_cpuid(7, 0, &buf);
6914 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6915 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6916 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6917 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6918 __kmp_umwait_enabled));
6921#ifndef AT_INTELPHIUSERMWAIT
6924#define AT_INTELPHIUSERMWAIT 10000
6929unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6930unsigned long getauxval(
unsigned long) {
return 0; }
6932static void __kmp_user_level_mwait_init() {
6937 if (__kmp_mic_type == mic3) {
6938 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6939 if ((res & 0x1) || __kmp_user_level_mwait) {
6940 __kmp_mwait_enabled = TRUE;
6941 if (__kmp_user_level_mwait) {
6942 KMP_INFORM(EnvMwaitWarn);
6945 __kmp_mwait_enabled = FALSE;
6948 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
6949 "__kmp_mwait_enabled = %d\n",
6950 __kmp_mic_type, __kmp_mwait_enabled));
6954static void __kmp_do_serial_initialize(
void) {
6958 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6960 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6961 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6962 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6963 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6964 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6974 __kmp_validate_locks();
6977 __kmp_init_allocator();
6983 if (__kmp_need_register_serial)
6984 __kmp_register_library_startup();
6987 if (TCR_4(__kmp_global.g.g_done)) {
6988 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6991 __kmp_global.g.g_abort = 0;
6992 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6995#if KMP_USE_ADAPTIVE_LOCKS
6996#if KMP_DEBUG_ADAPTIVE_LOCKS
6997 __kmp_init_speculative_stats();
7000#if KMP_STATS_ENABLED
7003 __kmp_init_lock(&__kmp_global_lock);
7004 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7005 __kmp_init_lock(&__kmp_debug_lock);
7006 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7007 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7008 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7009 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7010 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7011 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7012 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7013 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7014 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7015 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7016 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7017 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7018 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7019 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7020 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7022 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7024 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7028 __kmp_runtime_initialize();
7030#if KMP_MIC_SUPPORTED
7031 __kmp_check_mic_type();
7038 __kmp_abort_delay = 0;
7042 __kmp_dflt_team_nth_ub = __kmp_xproc;
7043 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7044 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7046 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7047 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7049 __kmp_max_nth = __kmp_sys_max_nth;
7050 __kmp_cg_max_nth = __kmp_sys_max_nth;
7051 __kmp_teams_max_nth = __kmp_xproc;
7052 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7053 __kmp_teams_max_nth = __kmp_sys_max_nth;
7058 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7060 __kmp_monitor_wakeups =
7061 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7062 __kmp_bt_intervals =
7063 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7066 __kmp_library = library_throughput;
7068 __kmp_static = kmp_sch_static_balanced;
7075#if KMP_FAST_REDUCTION_BARRIER
7076#define kmp_reduction_barrier_gather_bb ((int)1)
7077#define kmp_reduction_barrier_release_bb ((int)1)
7078#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7079#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7081 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7082 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7083 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7084 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7085 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7086#if KMP_FAST_REDUCTION_BARRIER
7087 if (i == bs_reduction_barrier) {
7089 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7090 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7091 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7092 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7096#if KMP_FAST_REDUCTION_BARRIER
7097#undef kmp_reduction_barrier_release_pat
7098#undef kmp_reduction_barrier_gather_pat
7099#undef kmp_reduction_barrier_release_bb
7100#undef kmp_reduction_barrier_gather_bb
7102#if KMP_MIC_SUPPORTED
7103 if (__kmp_mic_type == mic2) {
7105 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7106 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7108 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7109 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7111#if KMP_FAST_REDUCTION_BARRIER
7112 if (__kmp_mic_type == mic2) {
7113 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7114 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7121 __kmp_env_checks = TRUE;
7123 __kmp_env_checks = FALSE;
7127 __kmp_foreign_tp = TRUE;
7129 __kmp_global.g.g_dynamic = FALSE;
7130 __kmp_global.g.g_dynamic_mode = dynamic_default;
7132 __kmp_init_nesting_mode();
7134 __kmp_env_initialize(NULL);
7136#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7137 __kmp_user_level_mwait_init();
7141 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7142 if (__kmp_str_match_true(val)) {
7143 kmp_str_buf_t buffer;
7144 __kmp_str_buf_init(&buffer);
7145 __kmp_i18n_dump_catalog(&buffer);
7146 __kmp_printf(
"%s", buffer.str);
7147 __kmp_str_buf_free(&buffer);
7149 __kmp_env_free(&val);
7152 __kmp_threads_capacity =
7153 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7155 __kmp_tp_capacity = __kmp_default_tp_capacity(
7156 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7161 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7162 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7163 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7164 __kmp_thread_pool = NULL;
7165 __kmp_thread_pool_insert_pt = NULL;
7166 __kmp_team_pool = NULL;
7173 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7175 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7176 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7177 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7180 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7182 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7187 gtid = __kmp_register_root(TRUE);
7188 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7189 KMP_ASSERT(KMP_UBER_GTID(gtid));
7190 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7194 __kmp_common_initialize();
7198 __kmp_register_atfork();
7205 int rc = atexit(__kmp_internal_end_atexit);
7207 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7213#if KMP_HANDLE_SIGNALS
7219 __kmp_install_signals(FALSE);
7222 __kmp_install_signals(TRUE);
7227 __kmp_init_counter++;
7229 __kmp_init_serial = TRUE;
7231 if (__kmp_settings) {
7235 if (__kmp_display_env || __kmp_display_env_verbose) {
7236 __kmp_env_print_2();
7245 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7248void __kmp_serial_initialize(
void) {
7249 if (__kmp_init_serial) {
7252 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7253 if (__kmp_init_serial) {
7254 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7257 __kmp_do_serial_initialize();
7258 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7261static void __kmp_do_middle_initialize(
void) {
7263 int prev_dflt_team_nth;
7265 if (!__kmp_init_serial) {
7266 __kmp_do_serial_initialize();
7269 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7271 if (UNLIKELY(!__kmp_need_register_serial)) {
7274 __kmp_register_library_startup();
7279 prev_dflt_team_nth = __kmp_dflt_team_nth;
7281#if KMP_AFFINITY_SUPPORTED
7284 __kmp_affinity_initialize(__kmp_affinity);
7288 KMP_ASSERT(__kmp_xproc > 0);
7289 if (__kmp_avail_proc == 0) {
7290 __kmp_avail_proc = __kmp_xproc;
7296 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7297 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7302 if (__kmp_dflt_team_nth == 0) {
7303#ifdef KMP_DFLT_NTH_CORES
7305 __kmp_dflt_team_nth = __kmp_ncores;
7306 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7307 "__kmp_ncores (%d)\n",
7308 __kmp_dflt_team_nth));
7311 __kmp_dflt_team_nth = __kmp_avail_proc;
7312 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7313 "__kmp_avail_proc(%d)\n",
7314 __kmp_dflt_team_nth));
7318 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7319 __kmp_dflt_team_nth = KMP_MIN_NTH;
7321 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7322 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7325 if (__kmp_nesting_mode > 0)
7326 __kmp_set_nesting_mode_threads();
7330 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7332 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7337 for (i = 0; i < __kmp_threads_capacity; i++) {
7338 kmp_info_t *thread = __kmp_threads[i];
7341 if (thread->th.th_current_task->td_icvs.nproc != 0)
7344 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7349 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7350 __kmp_dflt_team_nth));
7352#ifdef KMP_ADJUST_BLOCKTIME
7354 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7355 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7356 if (__kmp_nth > __kmp_avail_proc) {
7357 __kmp_zero_bt = TRUE;
7363 TCW_SYNC_4(__kmp_init_middle, TRUE);
7365 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7368void __kmp_middle_initialize(
void) {
7369 if (__kmp_init_middle) {
7372 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7373 if (__kmp_init_middle) {
7374 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7377 __kmp_do_middle_initialize();
7378 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7381void __kmp_parallel_initialize(
void) {
7382 int gtid = __kmp_entry_gtid();
7385 if (TCR_4(__kmp_init_parallel))
7387 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7388 if (TCR_4(__kmp_init_parallel)) {
7389 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7394 if (TCR_4(__kmp_global.g.g_done)) {
7397 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7398 __kmp_infinite_loop();
7404 if (!__kmp_init_middle) {
7405 __kmp_do_middle_initialize();
7407 __kmp_assign_root_init_mask();
7408 __kmp_resume_if_hard_paused();
7411 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7412 KMP_ASSERT(KMP_UBER_GTID(gtid));
7414#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7417 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7418 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7419 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7423#if KMP_HANDLE_SIGNALS
7425 __kmp_install_signals(TRUE);
7429 __kmp_suspend_initialize();
7431#if defined(USE_LOAD_BALANCE)
7432 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7433 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7436 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7437 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7441 if (__kmp_version) {
7442 __kmp_print_version_2();
7446 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7449 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7451 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7454void __kmp_hidden_helper_initialize() {
7455 if (TCR_4(__kmp_init_hidden_helper))
7459 if (!TCR_4(__kmp_init_parallel))
7460 __kmp_parallel_initialize();
7464 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7465 if (TCR_4(__kmp_init_hidden_helper)) {
7466 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7470#if KMP_AFFINITY_SUPPORTED
7474 if (!__kmp_hh_affinity.flags.initialized)
7475 __kmp_affinity_initialize(__kmp_hh_affinity);
7479 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7483 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7486 __kmp_do_initialize_hidden_helper_threads();
7489 __kmp_hidden_helper_threads_initz_wait();
7492 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7494 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7499void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7501 kmp_disp_t *dispatch;
7506 this_thr->th.th_local.this_construct = 0;
7508 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7510 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7511 KMP_DEBUG_ASSERT(dispatch);
7512 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7516 dispatch->th_disp_index = 0;
7517 dispatch->th_doacross_buf_idx = 0;
7518 if (__kmp_env_consistency_check)
7519 __kmp_push_parallel(gtid, team->t.t_ident);
7524void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7526 if (__kmp_env_consistency_check)
7527 __kmp_pop_parallel(gtid, team->t.t_ident);
7529 __kmp_finish_implicit_task(this_thr);
7532int __kmp_invoke_task_func(
int gtid) {
7534 int tid = __kmp_tid_from_gtid(gtid);
7535 kmp_info_t *this_thr = __kmp_threads[gtid];
7536 kmp_team_t *team = this_thr->th.th_team;
7538 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7540 if (__itt_stack_caller_create_ptr) {
7542 if (team->t.t_stack_id != NULL) {
7543 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7545 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7546 __kmp_itt_stack_callee_enter(
7547 (__itt_caller)team->t.t_parent->t.t_stack_id);
7551#if INCLUDE_SSC_MARKS
7552 SSC_MARK_INVOKING();
7557 void **exit_frame_p;
7558 ompt_data_t *my_task_data;
7559 ompt_data_t *my_parallel_data;
7562 if (ompt_enabled.enabled) {
7563 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7564 .ompt_task_info.frame.exit_frame.ptr);
7566 exit_frame_p = &dummy;
7570 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7571 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7572 if (ompt_enabled.ompt_callback_implicit_task) {
7573 ompt_team_size = team->t.t_nproc;
7574 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7575 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7576 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7577 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7581#if KMP_STATS_ENABLED
7583 if (previous_state == stats_state_e::TEAMS_REGION) {
7584 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7586 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7588 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7591 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7592 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7599 *exit_frame_p = NULL;
7600 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7603#if KMP_STATS_ENABLED
7604 if (previous_state == stats_state_e::TEAMS_REGION) {
7605 KMP_SET_THREAD_STATE(previous_state);
7607 KMP_POP_PARTITIONED_TIMER();
7611 if (__itt_stack_caller_create_ptr) {
7613 if (team->t.t_stack_id != NULL) {
7614 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7616 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7617 __kmp_itt_stack_callee_leave(
7618 (__itt_caller)team->t.t_parent->t.t_stack_id);
7622 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7627void __kmp_teams_master(
int gtid) {
7629 kmp_info_t *thr = __kmp_threads[gtid];
7630 kmp_team_t *team = thr->th.th_team;
7631 ident_t *loc = team->t.t_ident;
7632 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7633 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7634 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7635 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7636 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7639 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7642 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7643 tmp->cg_nthreads = 1;
7644 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7645 " cg_nthreads to 1\n",
7647 tmp->up = thr->th.th_cg_roots;
7648 thr->th.th_cg_roots = tmp;
7652#if INCLUDE_SSC_MARKS
7655 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7656 (microtask_t)thr->th.th_teams_microtask,
7657 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7658#if INCLUDE_SSC_MARKS
7662 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7663 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7666 __kmp_join_call(loc, gtid
7675int __kmp_invoke_teams_master(
int gtid) {
7676 kmp_info_t *this_thr = __kmp_threads[gtid];
7677 kmp_team_t *team = this_thr->th.th_team;
7679 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7680 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7681 (
void *)__kmp_teams_master);
7683 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7685 int tid = __kmp_tid_from_gtid(gtid);
7686 ompt_data_t *task_data =
7687 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7688 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7689 if (ompt_enabled.ompt_callback_implicit_task) {
7690 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7691 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7693 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7696 __kmp_teams_master(gtid);
7698 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7700 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7709void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7710 kmp_info_t *thr = __kmp_threads[gtid];
7712 if (num_threads > 0)
7713 thr->th.th_set_nproc = num_threads;
7716static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7718 KMP_DEBUG_ASSERT(thr);
7720 if (!TCR_4(__kmp_init_middle))
7721 __kmp_middle_initialize();
7722 __kmp_assign_root_init_mask();
7723 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7724 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7726 if (num_threads == 0) {
7727 if (__kmp_teams_thread_limit > 0) {
7728 num_threads = __kmp_teams_thread_limit;
7730 num_threads = __kmp_avail_proc / num_teams;
7735 if (num_threads > __kmp_dflt_team_nth) {
7736 num_threads = __kmp_dflt_team_nth;
7738 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7739 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7741 if (num_teams * num_threads > __kmp_teams_max_nth) {
7742 num_threads = __kmp_teams_max_nth / num_teams;
7744 if (num_threads == 0) {
7748 if (num_threads < 0) {
7749 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7755 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7757 if (num_threads > __kmp_dflt_team_nth) {
7758 num_threads = __kmp_dflt_team_nth;
7760 if (num_teams * num_threads > __kmp_teams_max_nth) {
7761 int new_threads = __kmp_teams_max_nth / num_teams;
7762 if (new_threads == 0) {
7765 if (new_threads != num_threads) {
7766 if (!__kmp_reserve_warn) {
7767 __kmp_reserve_warn = 1;
7768 __kmp_msg(kmp_ms_warning,
7769 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7770 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7773 num_threads = new_threads;
7776 thr->th.th_teams_size.nth = num_threads;
7781void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7783 kmp_info_t *thr = __kmp_threads[gtid];
7784 if (num_teams < 0) {
7787 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7791 if (num_teams == 0) {
7792 if (__kmp_nteams > 0) {
7793 num_teams = __kmp_nteams;
7798 if (num_teams > __kmp_teams_max_nth) {
7799 if (!__kmp_reserve_warn) {
7800 __kmp_reserve_warn = 1;
7801 __kmp_msg(kmp_ms_warning,
7802 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7803 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7805 num_teams = __kmp_teams_max_nth;
7809 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7811 __kmp_push_thread_limit(thr, num_teams, num_threads);
7816void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7817 int num_teams_ub,
int num_threads) {
7818 kmp_info_t *thr = __kmp_threads[gtid];
7819 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7820 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7821 KMP_DEBUG_ASSERT(num_threads >= 0);
7823 if (num_teams_lb > num_teams_ub) {
7824 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7825 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7830 if (num_teams_lb == 0 && num_teams_ub > 0)
7831 num_teams_lb = num_teams_ub;
7833 if (num_teams_lb == 0 && num_teams_ub == 0) {
7834 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7835 if (num_teams > __kmp_teams_max_nth) {
7836 if (!__kmp_reserve_warn) {
7837 __kmp_reserve_warn = 1;
7838 __kmp_msg(kmp_ms_warning,
7839 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7840 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7842 num_teams = __kmp_teams_max_nth;
7844 }
else if (num_teams_lb == num_teams_ub) {
7845 num_teams = num_teams_ub;
7847 if (num_threads <= 0) {
7848 if (num_teams_ub > __kmp_teams_max_nth) {
7849 num_teams = num_teams_lb;
7851 num_teams = num_teams_ub;
7854 num_teams = (num_threads > __kmp_teams_max_nth)
7856 : __kmp_teams_max_nth / num_threads;
7857 if (num_teams < num_teams_lb) {
7858 num_teams = num_teams_lb;
7859 }
else if (num_teams > num_teams_ub) {
7860 num_teams = num_teams_ub;
7866 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7868 __kmp_push_thread_limit(thr, num_teams, num_threads);
7872void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7873 kmp_info_t *thr = __kmp_threads[gtid];
7874 thr->th.th_set_proc_bind = proc_bind;
7879void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7880 kmp_info_t *this_thr = __kmp_threads[gtid];
7886 KMP_DEBUG_ASSERT(team);
7887 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7888 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7891 team->t.t_construct = 0;
7892 team->t.t_ordered.dt.t_value =
7896 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7897 if (team->t.t_max_nproc > 1) {
7899 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7900 team->t.t_disp_buffer[i].buffer_index = i;
7901 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7904 team->t.t_disp_buffer[0].buffer_index = 0;
7905 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7909 KMP_ASSERT(this_thr->th.th_team == team);
7912 for (f = 0; f < team->t.t_nproc; f++) {
7913 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7914 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7919 __kmp_fork_barrier(gtid, 0);
7922void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7923 kmp_info_t *this_thr = __kmp_threads[gtid];
7925 KMP_DEBUG_ASSERT(team);
7926 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7927 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7933 if (__kmp_threads[gtid] &&
7934 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7935 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7936 __kmp_threads[gtid]);
7937 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7938 "team->t.t_nproc=%d\n",
7939 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7941 __kmp_print_structure();
7943 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7944 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7947 __kmp_join_barrier(gtid);
7949 if (ompt_enabled.enabled &&
7950 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7951 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7952 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7953 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7955 void *codeptr = NULL;
7956 if (KMP_MASTER_TID(ds_tid) &&
7957 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7958 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7959 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7961 if (ompt_enabled.ompt_callback_sync_region_wait) {
7962 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7963 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7966 if (ompt_enabled.ompt_callback_sync_region) {
7967 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7968 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7972 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7973 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7974 ompt_scope_end, NULL, task_data, 0, ds_tid,
7975 ompt_task_implicit);
7981 KMP_ASSERT(this_thr->th.th_team == team);
7986#ifdef USE_LOAD_BALANCE
7990static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7993 kmp_team_t *hot_team;
7995 if (root->r.r_active) {
7998 hot_team = root->r.r_hot_team;
7999 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8000 return hot_team->t.t_nproc - 1;
8005 for (i = 1; i < hot_team->t.t_nproc; i++) {
8006 if (hot_team->t.t_threads[i]->th.th_active) {
8015static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8018 int hot_team_active;
8019 int team_curr_active;
8022 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8024 KMP_DEBUG_ASSERT(root);
8025 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8026 ->th.th_current_task->td_icvs.dynamic == TRUE);
8027 KMP_DEBUG_ASSERT(set_nproc > 1);
8029 if (set_nproc == 1) {
8030 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8039 pool_active = __kmp_thread_pool_active_nth;
8040 hot_team_active = __kmp_active_hot_team_nproc(root);
8041 team_curr_active = pool_active + hot_team_active + 1;
8044 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8045 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8046 "hot team active = %d\n",
8047 system_active, pool_active, hot_team_active));
8049 if (system_active < 0) {
8053 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8054 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8057 retval = __kmp_avail_proc - __kmp_nth +
8058 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8059 if (retval > set_nproc) {
8062 if (retval < KMP_MIN_NTH) {
8063 retval = KMP_MIN_NTH;
8066 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8074 if (system_active < team_curr_active) {
8075 system_active = team_curr_active;
8077 retval = __kmp_avail_proc - system_active + team_curr_active;
8078 if (retval > set_nproc) {
8081 if (retval < KMP_MIN_NTH) {
8082 retval = KMP_MIN_NTH;
8085 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8094void __kmp_cleanup(
void) {
8097 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8099 if (TCR_4(__kmp_init_parallel)) {
8100#if KMP_HANDLE_SIGNALS
8101 __kmp_remove_signals();
8103 TCW_4(__kmp_init_parallel, FALSE);
8106 if (TCR_4(__kmp_init_middle)) {
8107#if KMP_AFFINITY_SUPPORTED
8108 __kmp_affinity_uninitialize();
8110 __kmp_cleanup_hierarchy();
8111 TCW_4(__kmp_init_middle, FALSE);
8114 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8116 if (__kmp_init_serial) {
8117 __kmp_runtime_destroy();
8118 __kmp_init_serial = FALSE;
8121 __kmp_cleanup_threadprivate_caches();
8123 for (f = 0; f < __kmp_threads_capacity; f++) {
8124 if (__kmp_root[f] != NULL) {
8125 __kmp_free(__kmp_root[f]);
8126 __kmp_root[f] = NULL;
8129 __kmp_free(__kmp_threads);
8132 __kmp_threads = NULL;
8134 __kmp_threads_capacity = 0;
8137 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8139 kmp_old_threads_list_t *next = ptr->next;
8140 __kmp_free(ptr->threads);
8145#if KMP_USE_DYNAMIC_LOCK
8146 __kmp_cleanup_indirect_user_locks();
8148 __kmp_cleanup_user_locks();
8152 __kmp_free(ompd_env_block);
8153 ompd_env_block = NULL;
8154 ompd_env_block_size = 0;
8158#if KMP_AFFINITY_SUPPORTED
8159 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8160 __kmp_cpuinfo_file = NULL;
8163#if KMP_USE_ADAPTIVE_LOCKS
8164#if KMP_DEBUG_ADAPTIVE_LOCKS
8165 __kmp_print_speculative_stats();
8168 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8169 __kmp_nested_nth.nth = NULL;
8170 __kmp_nested_nth.size = 0;
8171 __kmp_nested_nth.used = 0;
8172 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8173 __kmp_nested_proc_bind.bind_types = NULL;
8174 __kmp_nested_proc_bind.size = 0;
8175 __kmp_nested_proc_bind.used = 0;
8176 if (__kmp_affinity_format) {
8177 KMP_INTERNAL_FREE(__kmp_affinity_format);
8178 __kmp_affinity_format = NULL;
8181 __kmp_i18n_catclose();
8183#if KMP_USE_HIER_SCHED
8184 __kmp_hier_scheds.deallocate();
8187#if KMP_STATS_ENABLED
8191 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8196int __kmp_ignore_mppbeg(
void) {
8199 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8200 if (__kmp_str_match_false(env))
8207int __kmp_ignore_mppend(
void) {
8210 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8211 if (__kmp_str_match_false(env))
8218void __kmp_internal_begin(
void) {
8224 gtid = __kmp_entry_gtid();
8225 root = __kmp_threads[gtid]->th.th_root;
8226 KMP_ASSERT(KMP_UBER_GTID(gtid));
8228 if (root->r.r_begin)
8230 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8231 if (root->r.r_begin) {
8232 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8236 root->r.r_begin = TRUE;
8238 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8243void __kmp_user_set_library(
enum library_type arg) {
8250 gtid = __kmp_entry_gtid();
8251 thread = __kmp_threads[gtid];
8253 root = thread->th.th_root;
8255 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8257 if (root->r.r_in_parallel) {
8259 KMP_WARNING(SetLibraryIncorrectCall);
8264 case library_serial:
8265 thread->th.th_set_nproc = 0;
8266 set__nproc(thread, 1);
8268 case library_turnaround:
8269 thread->th.th_set_nproc = 0;
8270 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8271 : __kmp_dflt_team_nth_ub);
8273 case library_throughput:
8274 thread->th.th_set_nproc = 0;
8275 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8276 : __kmp_dflt_team_nth_ub);
8279 KMP_FATAL(UnknownLibraryType, arg);
8282 __kmp_aux_set_library(arg);
8285void __kmp_aux_set_stacksize(
size_t arg) {
8286 if (!__kmp_init_serial)
8287 __kmp_serial_initialize();
8290 if (arg & (0x1000 - 1)) {
8291 arg &= ~(0x1000 - 1);
8296 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8299 if (!TCR_4(__kmp_init_parallel)) {
8302 if (value < __kmp_sys_min_stksize)
8303 value = __kmp_sys_min_stksize;
8304 else if (value > KMP_MAX_STKSIZE)
8305 value = KMP_MAX_STKSIZE;
8307 __kmp_stksize = value;
8309 __kmp_env_stksize = TRUE;
8312 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8317void __kmp_aux_set_library(
enum library_type arg) {
8318 __kmp_library = arg;
8320 switch (__kmp_library) {
8321 case library_serial: {
8322 KMP_INFORM(LibraryIsSerial);
8324 case library_turnaround:
8325 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8326 __kmp_use_yield = 2;
8328 case library_throughput:
8329 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8330 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8333 KMP_FATAL(UnknownLibraryType, arg);
8339static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8340 kmp_info_t *thr = __kmp_entry_thread();
8341 teams_serialized = 0;
8342 if (thr->th.th_teams_microtask) {
8343 kmp_team_t *team = thr->th.th_team;
8344 int tlevel = thr->th.th_teams_level;
8345 int ii = team->t.t_level;
8346 teams_serialized = team->t.t_serialized;
8347 int level = tlevel + 1;
8348 KMP_DEBUG_ASSERT(ii >= tlevel);
8349 while (ii > level) {
8350 for (teams_serialized = team->t.t_serialized;
8351 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8353 if (team->t.t_serialized && (!teams_serialized)) {
8354 team = team->t.t_parent;
8358 team = team->t.t_parent;
8367int __kmp_aux_get_team_num() {
8369 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8371 if (serialized > 1) {
8374 return team->t.t_master_tid;
8380int __kmp_aux_get_num_teams() {
8382 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8384 if (serialized > 1) {
8387 return team->t.t_parent->t.t_nproc;
8426typedef struct kmp_affinity_format_field_t {
8428 const char *long_name;
8431} kmp_affinity_format_field_t;
8433static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8434#if KMP_AFFINITY_SUPPORTED
8435 {
'A',
"thread_affinity",
's'},
8437 {
't',
"team_num",
'd'},
8438 {
'T',
"num_teams",
'd'},
8439 {
'L',
"nesting_level",
'd'},
8440 {
'n',
"thread_num",
'd'},
8441 {
'N',
"num_threads",
'd'},
8442 {
'a',
"ancestor_tnum",
'd'},
8444 {
'P',
"process_id",
'd'},
8445 {
'i',
"native_thread_id",
'd'}};
8448static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8450 kmp_str_buf_t *field_buffer) {
8451 int rc, format_index, field_value;
8452 const char *width_left, *width_right;
8453 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8454 static const int FORMAT_SIZE = 20;
8455 char format[FORMAT_SIZE] = {0};
8456 char absolute_short_name = 0;
8458 KMP_DEBUG_ASSERT(gtid >= 0);
8459 KMP_DEBUG_ASSERT(th);
8460 KMP_DEBUG_ASSERT(**ptr ==
'%');
8461 KMP_DEBUG_ASSERT(field_buffer);
8463 __kmp_str_buf_clear(field_buffer);
8470 __kmp_str_buf_cat(field_buffer,
"%", 1);
8481 right_justify =
false;
8483 right_justify =
true;
8487 width_left = width_right = NULL;
8488 if (**ptr >=
'0' && **ptr <=
'9') {
8496 format[format_index++] =
'%';
8498 format[format_index++] =
'-';
8500 format[format_index++] =
'0';
8501 if (width_left && width_right) {
8505 while (i < 8 && width_left < width_right) {
8506 format[format_index++] = *width_left;
8514 found_valid_name =
false;
8515 parse_long_name = (**ptr ==
'{');
8516 if (parse_long_name)
8518 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8519 sizeof(__kmp_affinity_format_table[0]);
8521 char short_name = __kmp_affinity_format_table[i].short_name;
8522 const char *long_name = __kmp_affinity_format_table[i].long_name;
8523 char field_format = __kmp_affinity_format_table[i].field_format;
8524 if (parse_long_name) {
8525 size_t length = KMP_STRLEN(long_name);
8526 if (strncmp(*ptr, long_name, length) == 0) {
8527 found_valid_name =
true;
8530 }
else if (**ptr == short_name) {
8531 found_valid_name =
true;
8534 if (found_valid_name) {
8535 format[format_index++] = field_format;
8536 format[format_index++] =
'\0';
8537 absolute_short_name = short_name;
8541 if (parse_long_name) {
8543 absolute_short_name = 0;
8551 switch (absolute_short_name) {
8553 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8556 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8559 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8562 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8565 static const int BUFFER_SIZE = 256;
8566 char buf[BUFFER_SIZE];
8567 __kmp_expand_host_name(buf, BUFFER_SIZE);
8568 rc = __kmp_str_buf_print(field_buffer, format, buf);
8571 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8574 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8577 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8581 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8582 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8584#if KMP_AFFINITY_SUPPORTED
8587 __kmp_str_buf_init(&buf);
8588 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8589 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8590 __kmp_str_buf_free(&buf);
8596 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8598 if (parse_long_name) {
8607 KMP_ASSERT(format_index <= FORMAT_SIZE);
8617size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8618 kmp_str_buf_t *buffer) {
8619 const char *parse_ptr;
8621 const kmp_info_t *th;
8622 kmp_str_buf_t field;
8624 KMP_DEBUG_ASSERT(buffer);
8625 KMP_DEBUG_ASSERT(gtid >= 0);
8627 __kmp_str_buf_init(&field);
8628 __kmp_str_buf_clear(buffer);
8630 th = __kmp_threads[gtid];
8636 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8637 parse_ptr = __kmp_affinity_format;
8639 KMP_DEBUG_ASSERT(parse_ptr);
8641 while (*parse_ptr !=
'\0') {
8643 if (*parse_ptr ==
'%') {
8645 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8646 __kmp_str_buf_catbuf(buffer, &field);
8650 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8655 __kmp_str_buf_free(&field);
8660void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8662 __kmp_str_buf_init(&buf);
8663 __kmp_aux_capture_affinity(gtid, format, &buf);
8664 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8665 __kmp_str_buf_free(&buf);
8670void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8671 int blocktime = arg;
8677 __kmp_save_internal_controls(thread);
8680 if (blocktime < KMP_MIN_BLOCKTIME)
8681 blocktime = KMP_MIN_BLOCKTIME;
8682 else if (blocktime > KMP_MAX_BLOCKTIME)
8683 blocktime = KMP_MAX_BLOCKTIME;
8685 set__blocktime_team(thread->th.th_team, tid, blocktime);
8686 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8690 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8692 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8693 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8699 set__bt_set_team(thread->th.th_team, tid, bt_set);
8700 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8702 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8703 "bt_intervals=%d, monitor_updates=%d\n",
8704 __kmp_gtid_from_tid(tid, thread->th.th_team),
8705 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8706 __kmp_monitor_wakeups));
8708 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8709 __kmp_gtid_from_tid(tid, thread->th.th_team),
8710 thread->th.th_team->t.t_id, tid, blocktime));
8714void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8715 if (!__kmp_init_serial) {
8716 __kmp_serial_initialize();
8718 __kmp_env_initialize(str);
8720 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8728PACKED_REDUCTION_METHOD_T
8729__kmp_determine_reduction_method(
8730 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8731 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8732 kmp_critical_name *lck) {
8743 PACKED_REDUCTION_METHOD_T retval;
8747 KMP_DEBUG_ASSERT(loc);
8748 KMP_DEBUG_ASSERT(lck);
8750#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8752 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8753#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8755 retval = critical_reduce_block;
8758 team_size = __kmp_get_team_num_threads(global_tid);
8759 if (team_size == 1) {
8761 retval = empty_reduce_block;
8765 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8767#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8768 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
8770#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8771 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8773 int teamsize_cutoff = 4;
8775#if KMP_MIC_SUPPORTED
8776 if (__kmp_mic_type != non_mic) {
8777 teamsize_cutoff = 8;
8780 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8781 if (tree_available) {
8782 if (team_size <= teamsize_cutoff) {
8783 if (atomic_available) {
8784 retval = atomic_reduce_block;
8787 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8789 }
else if (atomic_available) {
8790 retval = atomic_reduce_block;
8793#error "Unknown or unsupported OS"
8797#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8799#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8803 if (atomic_available) {
8804 if (num_vars <= 2) {
8805 retval = atomic_reduce_block;
8811 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8812 if (atomic_available && (num_vars <= 3)) {
8813 retval = atomic_reduce_block;
8814 }
else if (tree_available) {
8815 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8816 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8817 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8822#error "Unknown or unsupported OS"
8826#error "Unknown or unsupported architecture"
8834 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8837 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8839 int atomic_available, tree_available;
8841 switch ((forced_retval = __kmp_force_reduction_method)) {
8842 case critical_reduce_block:
8846 case atomic_reduce_block:
8847 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8848 if (!atomic_available) {
8849 KMP_WARNING(RedMethodNotSupported,
"atomic");
8850 forced_retval = critical_reduce_block;
8854 case tree_reduce_block:
8855 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8856 if (!tree_available) {
8857 KMP_WARNING(RedMethodNotSupported,
"tree");
8858 forced_retval = critical_reduce_block;
8860#if KMP_FAST_REDUCTION_BARRIER
8861 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8870 retval = forced_retval;
8873 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8875#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8876#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8881kmp_int32 __kmp_get_reduce_method(
void) {
8882 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8887void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8891void __kmp_hard_pause() {
8892 __kmp_pause_status = kmp_hard_paused;
8893 __kmp_internal_end_thread(-1);
8897void __kmp_resume_if_soft_paused() {
8898 if (__kmp_pause_status == kmp_soft_paused) {
8899 __kmp_pause_status = kmp_not_paused;
8901 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8902 kmp_info_t *thread = __kmp_threads[gtid];
8904 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8906 if (fl.is_sleeping())
8908 else if (__kmp_try_suspend_mx(thread)) {
8909 __kmp_unlock_suspend_mx(thread);
8912 if (fl.is_sleeping()) {
8915 }
else if (__kmp_try_suspend_mx(thread)) {
8916 __kmp_unlock_suspend_mx(thread);
8928int __kmp_pause_resource(kmp_pause_status_t level) {
8929 if (level == kmp_not_paused) {
8930 if (__kmp_pause_status == kmp_not_paused) {
8934 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8935 __kmp_pause_status == kmp_hard_paused);
8936 __kmp_pause_status = kmp_not_paused;
8939 }
else if (level == kmp_soft_paused) {
8940 if (__kmp_pause_status != kmp_not_paused) {
8947 }
else if (level == kmp_hard_paused) {
8948 if (__kmp_pause_status != kmp_not_paused) {
8961void __kmp_omp_display_env(
int verbose) {
8962 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8963 if (__kmp_init_serial == 0)
8964 __kmp_do_serial_initialize();
8965 __kmp_display_env_impl(!verbose, verbose);
8966 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8970void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
8972 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
8974 kmp_info_t **other_threads = team->t.t_threads;
8978 for (
int f = 1; f < old_nthreads; ++f) {
8979 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
8981 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
8987 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
8988 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
8992 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
8994 team->t.t_threads[f]->th.th_used_in_team.store(2);
8995 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
8998 team->t.b->go_release();
9004 int count = old_nthreads - 1;
9006 count = old_nthreads - 1;
9007 for (
int f = 1; f < old_nthreads; ++f) {
9008 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9009 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9010 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9011 void *, other_threads[f]->th.th_sleep_loc);
9012 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9015 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9021 team->t.b->update_num_threads(new_nthreads);
9022 team->t.b->go_reset();
9025void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9027 KMP_DEBUG_ASSERT(team);
9033 for (
int f = 1; f < new_nthreads; ++f) {
9034 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9035 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9037 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9038 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9039 (kmp_flag_32<false, false> *)NULL);
9045 int count = new_nthreads - 1;
9047 count = new_nthreads - 1;
9048 for (
int f = 1; f < new_nthreads; ++f) {
9049 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9057kmp_info_t **__kmp_hidden_helper_threads;
9058kmp_info_t *__kmp_hidden_helper_main_thread;
9059std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9061kmp_int32 __kmp_hidden_helper_threads_num = 8;
9062kmp_int32 __kmp_enable_hidden_helper = TRUE;
9064kmp_int32 __kmp_hidden_helper_threads_num = 0;
9065kmp_int32 __kmp_enable_hidden_helper = FALSE;
9069std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9071void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9076 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9077 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9078 __kmp_hidden_helper_threads_num)
9084 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9085 __kmp_hidden_helper_initz_release();
9086 __kmp_hidden_helper_main_thread_wait();
9088 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9089 __kmp_hidden_helper_worker_thread_signal();
9095void __kmp_hidden_helper_threads_initz_routine() {
9097 const int gtid = __kmp_register_root(TRUE);
9098 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9099 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9100 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9101 __kmp_hidden_helper_threads_num;
9103 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9108 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9110 __kmp_hidden_helper_threads_deinitz_release();
9130void __kmp_init_nesting_mode() {
9131 int levels = KMP_HW_LAST;
9132 __kmp_nesting_mode_nlevels = levels;
9133 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9134 for (
int i = 0; i < levels; ++i)
9135 __kmp_nesting_nth_level[i] = 0;
9136 if (__kmp_nested_nth.size < levels) {
9137 __kmp_nested_nth.nth =
9138 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9139 __kmp_nested_nth.size = levels;
9144void __kmp_set_nesting_mode_threads() {
9145 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9147 if (__kmp_nesting_mode == 1)
9148 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9149 else if (__kmp_nesting_mode > 1)
9150 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9152 if (__kmp_topology) {
9154 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9155 loc < __kmp_nesting_mode_nlevels;
9156 loc++, hw_level++) {
9157 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9158 if (__kmp_nesting_nth_level[loc] == 1)
9162 if (__kmp_nesting_mode > 1 && loc > 1) {
9163 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9164 int num_cores = __kmp_topology->get_count(core_level);
9165 int upper_levels = 1;
9166 for (
int level = 0; level < loc - 1; ++level)
9167 upper_levels *= __kmp_nesting_nth_level[level];
9168 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9169 __kmp_nesting_nth_level[loc - 1] =
9170 num_cores / __kmp_nesting_nth_level[loc - 2];
9172 __kmp_nesting_mode_nlevels = loc;
9173 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9175 if (__kmp_avail_proc >= 4) {
9176 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9177 __kmp_nesting_nth_level[1] = 2;
9178 __kmp_nesting_mode_nlevels = 2;
9180 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9181 __kmp_nesting_mode_nlevels = 1;
9183 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9185 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9186 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9188 set__nproc(thread, __kmp_nesting_nth_level[0]);
9189 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9190 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9191 if (get__max_active_levels(thread) > 1) {
9193 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9195 if (__kmp_nesting_mode == 1)
9196 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)