LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "ompt-specific.h"
22
23#define MAX_MESSAGE 512
24
25// flags will be used in future, e.g. to implement openmp_strict library
26// restrictions
27
36void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37 // By default __kmpc_begin() is no-op.
38 char *env;
39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44 } else if (__kmp_ignore_mppbeg() == FALSE) {
45 // By default __kmp_ignore_mppbeg() returns TRUE.
46 __kmp_internal_begin();
47 KC_TRACE(10, ("__kmpc_begin: called\n"));
48 }
49}
50
59void __kmpc_end(ident_t *loc) {
60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63 // returns FALSE and __kmpc_end() will unregister this root (it can cause
64 // library shut down).
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, ("__kmpc_end: called\n"));
67 KA_TRACE(30, ("__kmpc_end\n"));
68
69 __kmp_internal_end_thread(-1);
70 }
71#if KMP_OS_WINDOWS && OMPT_SUPPORT
72 // Normal exit process on Windows does not allow worker threads of the final
73 // parallel region to finish reporting their events, so shutting down the
74 // library here fixes the issue at least for the cases where __kmpc_end() is
75 // placed properly.
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
78#endif
79}
80
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103
104 return gtid;
105}
106
122 KC_TRACE(10,
123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124
125 return TCR_4(__kmp_all_nth);
126}
127
135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
137}
138
145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
148}
149
156kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157#ifndef KMP_DEBUG
158
159 return TRUE;
160
161#else
162
163 const char *semi2;
164 const char *semi3;
165 int line_no;
166
167 if (__kmp_par_range == 0) {
168 return TRUE;
169 }
170 semi2 = loc->psource;
171 if (semi2 == NULL) {
172 return TRUE;
173 }
174 semi2 = strchr(semi2, ';');
175 if (semi2 == NULL) {
176 return TRUE;
177 }
178 semi2 = strchr(semi2 + 1, ';');
179 if (semi2 == NULL) {
180 return TRUE;
181 }
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185 name--;
186 }
187 if ((*name == '/') || (*name == ';')) {
188 name++;
189 }
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
192 }
193 }
194 semi3 = strchr(semi2 + 1, ';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
199 }
200 }
201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
204 }
205 return __kmp_par_range < 0;
206 }
207 return TRUE;
208
209#endif /* KMP_DEBUG */
210}
211
219 return __kmp_entry_thread()->th.th_root->r.r_active;
220}
221
231void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232 kmp_int32 num_threads) {
233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
237}
238
239void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241 /* the num_threads are automatically popped */
242}
243
244void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247 proc_bind));
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250}
251
262void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263 int gtid = __kmp_entry_gtid();
264
265#if (KMP_STATS_ENABLED)
266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state = KMP_GET_THREAD_STATE();
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 } else {
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273 }
274 int inParallel = __kmpc_in_parallel(loc);
275 if (inParallel) {
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277 } else {
278 KMP_COUNT_BLOCK(OMP_PARALLEL);
279 }
280#endif
281
282 // maybe to save thr_state is enough here
283 {
284 va_list ap;
285 va_start(ap, microtask);
286
287#if OMPT_SUPPORT
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
293 }
294 OMPT_STORE_RETURN_ADDRESS(gtid);
295#endif
296
297#if INCLUDE_SSC_MARKS
298 SSC_MARK_FORKING();
299#endif
300 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
303 kmp_va_addr_of(ap));
304#if INCLUDE_SSC_MARKS
305 SSC_MARK_JOINING();
306#endif
307 __kmp_join_call(loc, gtid
308#if OMPT_SUPPORT
309 ,
310 fork_context_intel
311#endif
312 );
313
314 va_end(ap);
315
316#if OMPT_SUPPORT
317 if (ompt_enabled.enabled) {
318 ompt_frame->enter_frame = ompt_data_none;
319 }
320#endif
321 }
322
323#if KMP_STATS_ENABLED
324 if (previous_state == stats_state_e::SERIAL_REGION) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326 KMP_SET_THREAD_STATE(previous_state);
327 } else {
328 KMP_POP_PARTITIONED_TIMER();
329 }
330#endif // KMP_STATS_ENABLED
331}
332
344void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
345 kmp_int32 num_teams, kmp_int32 num_threads) {
346 KA_TRACE(20,
347 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348 global_tid, num_teams, num_threads));
349 __kmp_assert_valid_gtid(global_tid);
350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
351}
352
369void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
370 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
371 kmp_int32 num_threads) {
372 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
373 " num_teams_ub=%d num_threads=%d\n",
374 global_tid, num_teams_lb, num_teams_ub, num_threads));
375 __kmp_assert_valid_gtid(global_tid);
376 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
377 num_threads);
378}
379
390void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
391 ...) {
392 int gtid = __kmp_entry_gtid();
393 kmp_info_t *this_thr = __kmp_threads[gtid];
394 va_list ap;
395 va_start(ap, microtask);
396
397#if KMP_STATS_ENABLED
398 KMP_COUNT_BLOCK(OMP_TEAMS);
399 stats_state_e previous_state = KMP_GET_THREAD_STATE();
400 if (previous_state == stats_state_e::SERIAL_REGION) {
401 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
402 } else {
403 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
404 }
405#endif
406
407 // remember teams entry point and nesting level
408 this_thr->th.th_teams_microtask = microtask;
409 this_thr->th.th_teams_level =
410 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
411
412#if OMPT_SUPPORT
413 kmp_team_t *parent_team = this_thr->th.th_team;
414 int tid = __kmp_tid_from_gtid(gtid);
415 if (ompt_enabled.enabled) {
416 parent_team->t.t_implicit_task_taskdata[tid]
417 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
418 }
419 OMPT_STORE_RETURN_ADDRESS(gtid);
420#endif
421
422 // check if __kmpc_push_num_teams called, set default number of teams
423 // otherwise
424 if (this_thr->th.th_teams_size.nteams == 0) {
425 __kmp_push_num_teams(loc, gtid, 0, 0);
426 }
427 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
428 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
430
431 __kmp_fork_call(
432 loc, gtid, fork_context_intel, argc,
433 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
434 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
435 __kmp_join_call(loc, gtid
436#if OMPT_SUPPORT
437 ,
438 fork_context_intel
439#endif
440 );
441
442 // Pop current CG root off list
443 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
444 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
445 this_thr->th.th_cg_roots = tmp->up;
446 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
447 " to node %p. cg_nthreads was %d\n",
448 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
449 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
450 int i = tmp->cg_nthreads--;
451 if (i == 1) { // check is we are the last thread in CG (not always the case)
452 __kmp_free(tmp);
453 }
454 // Restore current task's thread_limit from CG root
455 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
456 this_thr->th.th_current_task->td_icvs.thread_limit =
457 this_thr->th.th_cg_roots->cg_thread_limit;
458
459 this_thr->th.th_teams_microtask = NULL;
460 this_thr->th.th_teams_level = 0;
461 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
462 va_end(ap);
463#if KMP_STATS_ENABLED
464 if (previous_state == stats_state_e::SERIAL_REGION) {
465 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
466 KMP_SET_THREAD_STATE(previous_state);
467 } else {
468 KMP_POP_PARTITIONED_TIMER();
469 }
470#endif // KMP_STATS_ENABLED
471}
472
473// I don't think this function should ever have been exported.
474// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
475// openmp code ever called it, but it's been exported from the RTL for so
476// long that I'm afraid to remove the definition.
477int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
478
491void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
492 // The implementation is now in kmp_runtime.cpp so that it can share static
493 // functions with kmp_fork_call since the tasks to be done are similar in
494 // each case.
495 __kmp_assert_valid_gtid(global_tid);
496#if OMPT_SUPPORT
497 OMPT_STORE_RETURN_ADDRESS(global_tid);
498#endif
499 __kmp_serialized_parallel(loc, global_tid);
500}
501
509void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
510 kmp_internal_control_t *top;
511 kmp_info_t *this_thr;
512 kmp_team_t *serial_team;
513
514 KC_TRACE(10,
515 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
516
517 /* skip all this code for autopar serialized loops since it results in
518 unacceptable overhead */
519 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
520 return;
521
522 // Not autopar code
523 __kmp_assert_valid_gtid(global_tid);
524 if (!TCR_4(__kmp_init_parallel))
525 __kmp_parallel_initialize();
526
527 __kmp_resume_if_soft_paused();
528
529 this_thr = __kmp_threads[global_tid];
530 serial_team = this_thr->th.th_serial_team;
531
532 kmp_task_team_t *task_team = this_thr->th.th_task_team;
533 // we need to wait for the proxy tasks before finishing the thread
534 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
535 task_team->tt.tt_hidden_helper_task_encountered))
536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
537
538 KMP_MB();
539 KMP_DEBUG_ASSERT(serial_team);
540 KMP_ASSERT(serial_team->t.t_serialized);
541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
543 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
545
546#if OMPT_SUPPORT
547 if (ompt_enabled.enabled &&
548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
550 if (ompt_enabled.ompt_callback_implicit_task) {
551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
554 }
555
556 // reset clear the task id only after unlinking the task
557 ompt_data_t *parent_task_data;
558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
559
560 if (ompt_enabled.ompt_callback_parallel_end) {
561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
563 ompt_parallel_invoker_program | ompt_parallel_team,
564 OMPT_LOAD_RETURN_ADDRESS(global_tid));
565 }
566 __ompt_lw_taskteam_unlink(this_thr);
567 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
568 }
569#endif
570
571 /* If necessary, pop the internal control stack values and replace the team
572 * values */
573 top = serial_team->t.t_control_stack_top;
574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
576 serial_team->t.t_control_stack_top = top->next;
577 __kmp_free(top);
578 }
579
580 /* pop dispatch buffers stack */
581 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
582 {
583 dispatch_private_info_t *disp_buffer =
584 serial_team->t.t_dispatch->th_disp_buffer;
585 serial_team->t.t_dispatch->th_disp_buffer =
586 serial_team->t.t_dispatch->th_disp_buffer->next;
587 __kmp_free(disp_buffer);
588 }
589 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
590
591 --serial_team->t.t_serialized;
592 if (serial_team->t.t_serialized == 0) {
593
594 /* return to the parallel section */
595
596#if KMP_ARCH_X86 || KMP_ARCH_X86_64
597 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
598 __kmp_clear_x87_fpu_status_word();
599 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
600 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
601 }
602#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
603
604 __kmp_pop_current_task_from_thread(this_thr);
605#if OMPD_SUPPORT
606 if (ompd_state & OMPD_ENABLE_BP)
607 ompd_bp_parallel_end();
608#endif
609
610 this_thr->th.th_team = serial_team->t.t_parent;
611 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
612
613 /* restore values cached in the thread */
614 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
615 this_thr->th.th_team_master =
616 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
617 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
618
619 /* TODO the below shouldn't need to be adjusted for serialized teams */
620 this_thr->th.th_dispatch =
621 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
622
623 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
624 this_thr->th.th_current_task->td_flags.executing = 1;
625
626 if (__kmp_tasking_mode != tskm_immediate_exec) {
627 // Copy the task team from the new child / old parent team to the thread.
628 this_thr->th.th_task_team =
629 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
630 KA_TRACE(20,
631 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
632 "team %p\n",
633 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
634 }
635#if KMP_AFFINITY_SUPPORTED
636 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
637 __kmp_reset_root_init_mask(global_tid);
638 }
639#endif
640 } else {
641 if (__kmp_tasking_mode != tskm_immediate_exec) {
642 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
643 "depth of serial team %p to %d\n",
644 global_tid, serial_team, serial_team->t.t_serialized));
645 }
646 }
647
648 serial_team->t.t_level--;
649 if (__kmp_env_consistency_check)
650 __kmp_pop_parallel(global_tid, NULL);
651#if OMPT_SUPPORT
652 if (ompt_enabled.enabled)
653 this_thr->th.ompt_thread_info.state =
654 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
655 : ompt_state_work_parallel);
656#endif
657}
658
668 KC_TRACE(10, ("__kmpc_flush: called\n"));
669
670 /* need explicit __mf() here since use volatile instead in library */
671 KMP_MFENCE(); /* Flush all pending memory write invalidates. */
672
673#if OMPT_SUPPORT && OMPT_OPTIONAL
674 if (ompt_enabled.ompt_callback_flush) {
675 ompt_callbacks.ompt_callback(ompt_callback_flush)(
676 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
677 }
678#endif
679}
680
681/* -------------------------------------------------------------------------- */
689void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
690 KMP_COUNT_BLOCK(OMP_BARRIER);
691 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
692 __kmp_assert_valid_gtid(global_tid);
693
694 if (!TCR_4(__kmp_init_parallel))
695 __kmp_parallel_initialize();
696
697 __kmp_resume_if_soft_paused();
698
699 if (__kmp_env_consistency_check) {
700 if (loc == 0) {
701 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
702 }
703 __kmp_check_barrier(global_tid, ct_barrier, loc);
704 }
705
706#if OMPT_SUPPORT
707 ompt_frame_t *ompt_frame;
708 if (ompt_enabled.enabled) {
709 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
710 if (ompt_frame->enter_frame.ptr == NULL)
711 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
712 }
713 OMPT_STORE_RETURN_ADDRESS(global_tid);
714#endif
715 __kmp_threads[global_tid]->th.th_ident = loc;
716 // TODO: explicit barrier_wait_id:
717 // this function is called when 'barrier' directive is present or
718 // implicit barrier at the end of a worksharing construct.
719 // 1) better to add a per-thread barrier counter to a thread data structure
720 // 2) set to 0 when a new team is created
721 // 4) no sync is required
722
723 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
724#if OMPT_SUPPORT && OMPT_OPTIONAL
725 if (ompt_enabled.enabled) {
726 ompt_frame->enter_frame = ompt_data_none;
727 }
728#endif
729}
730
731/* The BARRIER for a MASTER section is always explicit */
738kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
739 int status = 0;
740
741 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
742 __kmp_assert_valid_gtid(global_tid);
743
744 if (!TCR_4(__kmp_init_parallel))
745 __kmp_parallel_initialize();
746
747 __kmp_resume_if_soft_paused();
748
749 if (KMP_MASTER_GTID(global_tid)) {
750 KMP_COUNT_BLOCK(OMP_MASTER);
751 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
752 status = 1;
753 }
754
755#if OMPT_SUPPORT && OMPT_OPTIONAL
756 if (status) {
757 if (ompt_enabled.ompt_callback_masked) {
758 kmp_info_t *this_thr = __kmp_threads[global_tid];
759 kmp_team_t *team = this_thr->th.th_team;
760
761 int tid = __kmp_tid_from_gtid(global_tid);
762 ompt_callbacks.ompt_callback(ompt_callback_masked)(
763 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
764 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
765 OMPT_GET_RETURN_ADDRESS(0));
766 }
767 }
768#endif
769
770 if (__kmp_env_consistency_check) {
771#if KMP_USE_DYNAMIC_LOCK
772 if (status)
773 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
774 else
775 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
776#else
777 if (status)
778 __kmp_push_sync(global_tid, ct_master, loc, NULL);
779 else
780 __kmp_check_sync(global_tid, ct_master, loc, NULL);
781#endif
782 }
783
784 return status;
785}
786
795void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
796 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
797 __kmp_assert_valid_gtid(global_tid);
798 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
799 KMP_POP_PARTITIONED_TIMER();
800
801#if OMPT_SUPPORT && OMPT_OPTIONAL
802 kmp_info_t *this_thr = __kmp_threads[global_tid];
803 kmp_team_t *team = this_thr->th.th_team;
804 if (ompt_enabled.ompt_callback_masked) {
805 int tid = __kmp_tid_from_gtid(global_tid);
806 ompt_callbacks.ompt_callback(ompt_callback_masked)(
807 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
808 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
809 OMPT_GET_RETURN_ADDRESS(0));
810 }
811#endif
812
813 if (__kmp_env_consistency_check) {
814 if (KMP_MASTER_GTID(global_tid))
815 __kmp_pop_sync(global_tid, ct_master, loc);
816 }
817}
818
827kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
828 int status = 0;
829 int tid;
830 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
831 __kmp_assert_valid_gtid(global_tid);
832
833 if (!TCR_4(__kmp_init_parallel))
834 __kmp_parallel_initialize();
835
836 __kmp_resume_if_soft_paused();
837
838 tid = __kmp_tid_from_gtid(global_tid);
839 if (tid == filter) {
840 KMP_COUNT_BLOCK(OMP_MASKED);
841 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
842 status = 1;
843 }
844
845#if OMPT_SUPPORT && OMPT_OPTIONAL
846 if (status) {
847 if (ompt_enabled.ompt_callback_masked) {
848 kmp_info_t *this_thr = __kmp_threads[global_tid];
849 kmp_team_t *team = this_thr->th.th_team;
850 ompt_callbacks.ompt_callback(ompt_callback_masked)(
851 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
852 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
853 OMPT_GET_RETURN_ADDRESS(0));
854 }
855 }
856#endif
857
858 if (__kmp_env_consistency_check) {
859#if KMP_USE_DYNAMIC_LOCK
860 if (status)
861 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
862 else
863 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
864#else
865 if (status)
866 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
867 else
868 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
869#endif
870 }
871
872 return status;
873}
874
883void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
884 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
885 __kmp_assert_valid_gtid(global_tid);
886 KMP_POP_PARTITIONED_TIMER();
887
888#if OMPT_SUPPORT && OMPT_OPTIONAL
889 kmp_info_t *this_thr = __kmp_threads[global_tid];
890 kmp_team_t *team = this_thr->th.th_team;
891 if (ompt_enabled.ompt_callback_masked) {
892 int tid = __kmp_tid_from_gtid(global_tid);
893 ompt_callbacks.ompt_callback(ompt_callback_masked)(
894 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
895 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
896 OMPT_GET_RETURN_ADDRESS(0));
897 }
898#endif
899
900 if (__kmp_env_consistency_check) {
901 __kmp_pop_sync(global_tid, ct_masked, loc);
902 }
903}
904
912void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
913 int cid = 0;
914 kmp_info_t *th;
915 KMP_DEBUG_ASSERT(__kmp_init_serial);
916
917 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
918 __kmp_assert_valid_gtid(gtid);
919
920 if (!TCR_4(__kmp_init_parallel))
921 __kmp_parallel_initialize();
922
923 __kmp_resume_if_soft_paused();
924
925#if USE_ITT_BUILD
926 __kmp_itt_ordered_prep(gtid);
927// TODO: ordered_wait_id
928#endif /* USE_ITT_BUILD */
929
930 th = __kmp_threads[gtid];
931
932#if OMPT_SUPPORT && OMPT_OPTIONAL
933 kmp_team_t *team;
934 ompt_wait_id_t lck;
935 void *codeptr_ra;
936 OMPT_STORE_RETURN_ADDRESS(gtid);
937 if (ompt_enabled.enabled) {
938 team = __kmp_team_from_gtid(gtid);
939 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
940 /* OMPT state update */
941 th->th.ompt_thread_info.wait_id = lck;
942 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
943
944 /* OMPT event callback */
945 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
946 if (ompt_enabled.ompt_callback_mutex_acquire) {
947 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
948 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
949 codeptr_ra);
950 }
951 }
952#endif
953
954 if (th->th.th_dispatch->th_deo_fcn != 0)
955 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
956 else
957 __kmp_parallel_deo(&gtid, &cid, loc);
958
959#if OMPT_SUPPORT && OMPT_OPTIONAL
960 if (ompt_enabled.enabled) {
961 /* OMPT state update */
962 th->th.ompt_thread_info.state = ompt_state_work_parallel;
963 th->th.ompt_thread_info.wait_id = 0;
964
965 /* OMPT event callback */
966 if (ompt_enabled.ompt_callback_mutex_acquired) {
967 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
968 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
969 }
970 }
971#endif
972
973#if USE_ITT_BUILD
974 __kmp_itt_ordered_start(gtid);
975#endif /* USE_ITT_BUILD */
976}
977
985void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
986 int cid = 0;
987 kmp_info_t *th;
988
989 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
990 __kmp_assert_valid_gtid(gtid);
991
992#if USE_ITT_BUILD
993 __kmp_itt_ordered_end(gtid);
994// TODO: ordered_wait_id
995#endif /* USE_ITT_BUILD */
996
997 th = __kmp_threads[gtid];
998
999 if (th->th.th_dispatch->th_dxo_fcn != 0)
1000 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1001 else
1002 __kmp_parallel_dxo(&gtid, &cid, loc);
1003
1004#if OMPT_SUPPORT && OMPT_OPTIONAL
1005 OMPT_STORE_RETURN_ADDRESS(gtid);
1006 if (ompt_enabled.ompt_callback_mutex_released) {
1007 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1008 ompt_mutex_ordered,
1009 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1010 ->t.t_ordered.dt.t_value,
1011 OMPT_LOAD_RETURN_ADDRESS(gtid));
1012 }
1013#endif
1014}
1015
1016#if KMP_USE_DYNAMIC_LOCK
1017
1018static __forceinline void
1019__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1020 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1021 // Pointer to the allocated indirect lock is written to crit, while indexing
1022 // is ignored.
1023 void *idx;
1024 kmp_indirect_lock_t **lck;
1025 lck = (kmp_indirect_lock_t **)crit;
1026 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1027 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1028 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1029 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1030 KA_TRACE(20,
1031 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1032#if USE_ITT_BUILD
1033 __kmp_itt_critical_creating(ilk->lock, loc);
1034#endif
1035 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1036 if (status == 0) {
1037#if USE_ITT_BUILD
1038 __kmp_itt_critical_destroyed(ilk->lock);
1039#endif
1040 // We don't really need to destroy the unclaimed lock here since it will be
1041 // cleaned up at program exit.
1042 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1043 }
1044 KMP_DEBUG_ASSERT(*lck != NULL);
1045}
1046
1047// Fast-path acquire tas lock
1048#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1049 { \
1050 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1051 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1052 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1053 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1054 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1055 kmp_uint32 spins; \
1056 KMP_FSYNC_PREPARE(l); \
1057 KMP_INIT_YIELD(spins); \
1058 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1059 do { \
1060 if (TCR_4(__kmp_nth) > \
1061 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1062 KMP_YIELD(TRUE); \
1063 } else { \
1064 KMP_YIELD_SPIN(spins); \
1065 } \
1066 __kmp_spin_backoff(&backoff); \
1067 } while ( \
1068 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1069 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1070 } \
1071 KMP_FSYNC_ACQUIRED(l); \
1072 }
1073
1074// Fast-path test tas lock
1075#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1076 { \
1077 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1078 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1079 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1080 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1081 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1082 }
1083
1084// Fast-path release tas lock
1085#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1086 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1087
1088#if KMP_USE_FUTEX
1089
1090#include <sys/syscall.h>
1091#include <unistd.h>
1092#ifndef FUTEX_WAIT
1093#define FUTEX_WAIT 0
1094#endif
1095#ifndef FUTEX_WAKE
1096#define FUTEX_WAKE 1
1097#endif
1098
1099// Fast-path acquire futex lock
1100#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1101 { \
1102 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1103 kmp_int32 gtid_code = (gtid + 1) << 1; \
1104 KMP_MB(); \
1105 KMP_FSYNC_PREPARE(ftx); \
1106 kmp_int32 poll_val; \
1107 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1108 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1109 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1110 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1111 if (!cond) { \
1112 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1113 poll_val | \
1114 KMP_LOCK_BUSY(1, futex))) { \
1115 continue; \
1116 } \
1117 poll_val |= KMP_LOCK_BUSY(1, futex); \
1118 } \
1119 kmp_int32 rc; \
1120 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1121 NULL, NULL, 0)) != 0) { \
1122 continue; \
1123 } \
1124 gtid_code |= 1; \
1125 } \
1126 KMP_FSYNC_ACQUIRED(ftx); \
1127 }
1128
1129// Fast-path test futex lock
1130#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1131 { \
1132 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1133 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1134 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1135 KMP_FSYNC_ACQUIRED(ftx); \
1136 rc = TRUE; \
1137 } else { \
1138 rc = FALSE; \
1139 } \
1140 }
1141
1142// Fast-path release futex lock
1143#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1144 { \
1145 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1146 KMP_MB(); \
1147 KMP_FSYNC_RELEASING(ftx); \
1148 kmp_int32 poll_val = \
1149 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1150 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1151 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1152 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1153 } \
1154 KMP_MB(); \
1155 KMP_YIELD_OVERSUB(); \
1156 }
1157
1158#endif // KMP_USE_FUTEX
1159
1160#else // KMP_USE_DYNAMIC_LOCK
1161
1162static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1163 ident_t const *loc,
1164 kmp_int32 gtid) {
1165 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1166
1167 // Because of the double-check, the following load doesn't need to be volatile
1168 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1169
1170 if (lck == NULL) {
1171 void *idx;
1172
1173 // Allocate & initialize the lock.
1174 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1175 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1176 __kmp_init_user_lock_with_checks(lck);
1177 __kmp_set_user_lock_location(lck, loc);
1178#if USE_ITT_BUILD
1179 __kmp_itt_critical_creating(lck);
1180// __kmp_itt_critical_creating() should be called *before* the first usage
1181// of underlying lock. It is the only place where we can guarantee it. There
1182// are chances the lock will destroyed with no usage, but it is not a
1183// problem, because this is not real event seen by user but rather setting
1184// name for object (lock). See more details in kmp_itt.h.
1185#endif /* USE_ITT_BUILD */
1186
1187 // Use a cmpxchg instruction to slam the start of the critical section with
1188 // the lock pointer. If another thread beat us to it, deallocate the lock,
1189 // and use the lock that the other thread allocated.
1190 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1191
1192 if (status == 0) {
1193// Deallocate the lock and reload the value.
1194#if USE_ITT_BUILD
1195 __kmp_itt_critical_destroyed(lck);
1196// Let ITT know the lock is destroyed and the same memory location may be reused
1197// for another purpose.
1198#endif /* USE_ITT_BUILD */
1199 __kmp_destroy_user_lock_with_checks(lck);
1200 __kmp_user_lock_free(&idx, gtid, lck);
1201 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1202 KMP_DEBUG_ASSERT(lck != NULL);
1203 }
1204 }
1205 return lck;
1206}
1207
1208#endif // KMP_USE_DYNAMIC_LOCK
1209
1220void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1221 kmp_critical_name *crit) {
1222#if KMP_USE_DYNAMIC_LOCK
1223#if OMPT_SUPPORT && OMPT_OPTIONAL
1224 OMPT_STORE_RETURN_ADDRESS(global_tid);
1225#endif // OMPT_SUPPORT
1226 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1227#else
1228 KMP_COUNT_BLOCK(OMP_CRITICAL);
1229#if OMPT_SUPPORT && OMPT_OPTIONAL
1230 ompt_state_t prev_state = ompt_state_undefined;
1231 ompt_thread_info_t ti;
1232#endif
1233 kmp_user_lock_p lck;
1234
1235 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1236 __kmp_assert_valid_gtid(global_tid);
1237
1238 // TODO: add THR_OVHD_STATE
1239
1240 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1241 KMP_CHECK_USER_LOCK_INIT();
1242
1243 if ((__kmp_user_lock_kind == lk_tas) &&
1244 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1245 lck = (kmp_user_lock_p)crit;
1246 }
1247#if KMP_USE_FUTEX
1248 else if ((__kmp_user_lock_kind == lk_futex) &&
1249 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1250 lck = (kmp_user_lock_p)crit;
1251 }
1252#endif
1253 else { // ticket, queuing or drdpa
1254 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1255 }
1256
1257 if (__kmp_env_consistency_check)
1258 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1259
1260 // since the critical directive binds to all threads, not just the current
1261 // team we have to check this even if we are in a serialized team.
1262 // also, even if we are the uber thread, we still have to conduct the lock,
1263 // as we have to contend with sibling threads.
1264
1265#if USE_ITT_BUILD
1266 __kmp_itt_critical_acquiring(lck);
1267#endif /* USE_ITT_BUILD */
1268#if OMPT_SUPPORT && OMPT_OPTIONAL
1269 OMPT_STORE_RETURN_ADDRESS(gtid);
1270 void *codeptr_ra = NULL;
1271 if (ompt_enabled.enabled) {
1272 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1273 /* OMPT state update */
1274 prev_state = ti.state;
1275 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1276 ti.state = ompt_state_wait_critical;
1277
1278 /* OMPT event callback */
1279 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1280 if (ompt_enabled.ompt_callback_mutex_acquire) {
1281 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1282 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1283 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1284 }
1285 }
1286#endif
1287 // Value of 'crit' should be good for using as a critical_id of the critical
1288 // section directive.
1289 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1290
1291#if USE_ITT_BUILD
1292 __kmp_itt_critical_acquired(lck);
1293#endif /* USE_ITT_BUILD */
1294#if OMPT_SUPPORT && OMPT_OPTIONAL
1295 if (ompt_enabled.enabled) {
1296 /* OMPT state update */
1297 ti.state = prev_state;
1298 ti.wait_id = 0;
1299
1300 /* OMPT event callback */
1301 if (ompt_enabled.ompt_callback_mutex_acquired) {
1302 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1303 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1304 }
1305 }
1306#endif
1307 KMP_POP_PARTITIONED_TIMER();
1308
1309 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1310 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1311#endif // KMP_USE_DYNAMIC_LOCK
1312}
1313
1314#if KMP_USE_DYNAMIC_LOCK
1315
1316// Converts the given hint to an internal lock implementation
1317static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1318#if KMP_USE_TSX
1319#define KMP_TSX_LOCK(seq) lockseq_##seq
1320#else
1321#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1322#endif
1323
1324#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1325#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1326#else
1327#define KMP_CPUINFO_RTM 0
1328#endif
1329
1330 // Hints that do not require further logic
1331 if (hint & kmp_lock_hint_hle)
1332 return KMP_TSX_LOCK(hle);
1333 if (hint & kmp_lock_hint_rtm)
1334 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1335 if (hint & kmp_lock_hint_adaptive)
1336 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1337
1338 // Rule out conflicting hints first by returning the default lock
1339 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1340 return __kmp_user_lock_seq;
1341 if ((hint & omp_lock_hint_speculative) &&
1342 (hint & omp_lock_hint_nonspeculative))
1343 return __kmp_user_lock_seq;
1344
1345 // Do not even consider speculation when it appears to be contended
1346 if (hint & omp_lock_hint_contended)
1347 return lockseq_queuing;
1348
1349 // Uncontended lock without speculation
1350 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1351 return lockseq_tas;
1352
1353 // Use RTM lock for speculation
1354 if (hint & omp_lock_hint_speculative)
1355 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1356
1357 return __kmp_user_lock_seq;
1358}
1359
1360#if OMPT_SUPPORT && OMPT_OPTIONAL
1361#if KMP_USE_DYNAMIC_LOCK
1362static kmp_mutex_impl_t
1363__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1364 if (user_lock) {
1365 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1366 case 0:
1367 break;
1368#if KMP_USE_FUTEX
1369 case locktag_futex:
1370 return kmp_mutex_impl_queuing;
1371#endif
1372 case locktag_tas:
1373 return kmp_mutex_impl_spin;
1374#if KMP_USE_TSX
1375 case locktag_hle:
1376 case locktag_rtm_spin:
1377 return kmp_mutex_impl_speculative;
1378#endif
1379 default:
1380 return kmp_mutex_impl_none;
1381 }
1382 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1383 }
1384 KMP_ASSERT(ilock);
1385 switch (ilock->type) {
1386#if KMP_USE_TSX
1387 case locktag_adaptive:
1388 case locktag_rtm_queuing:
1389 return kmp_mutex_impl_speculative;
1390#endif
1391 case locktag_nested_tas:
1392 return kmp_mutex_impl_spin;
1393#if KMP_USE_FUTEX
1394 case locktag_nested_futex:
1395#endif
1396 case locktag_ticket:
1397 case locktag_queuing:
1398 case locktag_drdpa:
1399 case locktag_nested_ticket:
1400 case locktag_nested_queuing:
1401 case locktag_nested_drdpa:
1402 return kmp_mutex_impl_queuing;
1403 default:
1404 return kmp_mutex_impl_none;
1405 }
1406}
1407#else
1408// For locks without dynamic binding
1409static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1410 switch (__kmp_user_lock_kind) {
1411 case lk_tas:
1412 return kmp_mutex_impl_spin;
1413#if KMP_USE_FUTEX
1414 case lk_futex:
1415#endif
1416 case lk_ticket:
1417 case lk_queuing:
1418 case lk_drdpa:
1419 return kmp_mutex_impl_queuing;
1420#if KMP_USE_TSX
1421 case lk_hle:
1422 case lk_rtm_queuing:
1423 case lk_rtm_spin:
1424 case lk_adaptive:
1425 return kmp_mutex_impl_speculative;
1426#endif
1427 default:
1428 return kmp_mutex_impl_none;
1429 }
1430}
1431#endif // KMP_USE_DYNAMIC_LOCK
1432#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1433
1447void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1448 kmp_critical_name *crit, uint32_t hint) {
1449 KMP_COUNT_BLOCK(OMP_CRITICAL);
1450 kmp_user_lock_p lck;
1451#if OMPT_SUPPORT && OMPT_OPTIONAL
1452 ompt_state_t prev_state = ompt_state_undefined;
1453 ompt_thread_info_t ti;
1454 // This is the case, if called from __kmpc_critical:
1455 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1456 if (!codeptr)
1457 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1458#endif
1459
1460 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1461 __kmp_assert_valid_gtid(global_tid);
1462
1463 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1464 // Check if it is initialized.
1465 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1466 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1467 if (*lk == 0) {
1468 if (KMP_IS_D_LOCK(lockseq)) {
1469 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1470 KMP_GET_D_TAG(lockseq));
1471 } else {
1472 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1473 }
1474 }
1475 // Branch for accessing the actual lock object and set operation. This
1476 // branching is inevitable since this lock initialization does not follow the
1477 // normal dispatch path (lock table is not used).
1478 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1479 lck = (kmp_user_lock_p)lk;
1480 if (__kmp_env_consistency_check) {
1481 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1482 __kmp_map_hint_to_lock(hint));
1483 }
1484#if USE_ITT_BUILD
1485 __kmp_itt_critical_acquiring(lck);
1486#endif
1487#if OMPT_SUPPORT && OMPT_OPTIONAL
1488 if (ompt_enabled.enabled) {
1489 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1490 /* OMPT state update */
1491 prev_state = ti.state;
1492 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1493 ti.state = ompt_state_wait_critical;
1494
1495 /* OMPT event callback */
1496 if (ompt_enabled.ompt_callback_mutex_acquire) {
1497 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1498 ompt_mutex_critical, (unsigned int)hint,
1499 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1500 codeptr);
1501 }
1502 }
1503#endif
1504#if KMP_USE_INLINED_TAS
1505 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1506 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1507 } else
1508#elif KMP_USE_INLINED_FUTEX
1509 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1510 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1511 } else
1512#endif
1513 {
1514 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1515 }
1516 } else {
1517 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1518 lck = ilk->lock;
1519 if (__kmp_env_consistency_check) {
1520 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1521 __kmp_map_hint_to_lock(hint));
1522 }
1523#if USE_ITT_BUILD
1524 __kmp_itt_critical_acquiring(lck);
1525#endif
1526#if OMPT_SUPPORT && OMPT_OPTIONAL
1527 if (ompt_enabled.enabled) {
1528 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1529 /* OMPT state update */
1530 prev_state = ti.state;
1531 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1532 ti.state = ompt_state_wait_critical;
1533
1534 /* OMPT event callback */
1535 if (ompt_enabled.ompt_callback_mutex_acquire) {
1536 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1537 ompt_mutex_critical, (unsigned int)hint,
1538 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1539 codeptr);
1540 }
1541 }
1542#endif
1543 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1544 }
1545 KMP_POP_PARTITIONED_TIMER();
1546
1547#if USE_ITT_BUILD
1548 __kmp_itt_critical_acquired(lck);
1549#endif /* USE_ITT_BUILD */
1550#if OMPT_SUPPORT && OMPT_OPTIONAL
1551 if (ompt_enabled.enabled) {
1552 /* OMPT state update */
1553 ti.state = prev_state;
1554 ti.wait_id = 0;
1555
1556 /* OMPT event callback */
1557 if (ompt_enabled.ompt_callback_mutex_acquired) {
1558 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1559 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1560 }
1561 }
1562#endif
1563
1564 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1565 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1566} // __kmpc_critical_with_hint
1567
1568#endif // KMP_USE_DYNAMIC_LOCK
1569
1579void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1580 kmp_critical_name *crit) {
1581 kmp_user_lock_p lck;
1582
1583 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1584
1585#if KMP_USE_DYNAMIC_LOCK
1586 int locktag = KMP_EXTRACT_D_TAG(crit);
1587 if (locktag) {
1588 lck = (kmp_user_lock_p)crit;
1589 KMP_ASSERT(lck != NULL);
1590 if (__kmp_env_consistency_check) {
1591 __kmp_pop_sync(global_tid, ct_critical, loc);
1592 }
1593#if USE_ITT_BUILD
1594 __kmp_itt_critical_releasing(lck);
1595#endif
1596#if KMP_USE_INLINED_TAS
1597 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1598 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1599 } else
1600#elif KMP_USE_INLINED_FUTEX
1601 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1602 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1603 } else
1604#endif
1605 {
1606 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1607 }
1608 } else {
1609 kmp_indirect_lock_t *ilk =
1610 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1611 KMP_ASSERT(ilk != NULL);
1612 lck = ilk->lock;
1613 if (__kmp_env_consistency_check) {
1614 __kmp_pop_sync(global_tid, ct_critical, loc);
1615 }
1616#if USE_ITT_BUILD
1617 __kmp_itt_critical_releasing(lck);
1618#endif
1619 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1620 }
1621
1622#else // KMP_USE_DYNAMIC_LOCK
1623
1624 if ((__kmp_user_lock_kind == lk_tas) &&
1625 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1626 lck = (kmp_user_lock_p)crit;
1627 }
1628#if KMP_USE_FUTEX
1629 else if ((__kmp_user_lock_kind == lk_futex) &&
1630 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1631 lck = (kmp_user_lock_p)crit;
1632 }
1633#endif
1634 else { // ticket, queuing or drdpa
1635 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1636 }
1637
1638 KMP_ASSERT(lck != NULL);
1639
1640 if (__kmp_env_consistency_check)
1641 __kmp_pop_sync(global_tid, ct_critical, loc);
1642
1643#if USE_ITT_BUILD
1644 __kmp_itt_critical_releasing(lck);
1645#endif /* USE_ITT_BUILD */
1646 // Value of 'crit' should be good for using as a critical_id of the critical
1647 // section directive.
1648 __kmp_release_user_lock_with_checks(lck, global_tid);
1649
1650#endif // KMP_USE_DYNAMIC_LOCK
1651
1652#if OMPT_SUPPORT && OMPT_OPTIONAL
1653 /* OMPT release event triggers after lock is released; place here to trigger
1654 * for all #if branches */
1655 OMPT_STORE_RETURN_ADDRESS(global_tid);
1656 if (ompt_enabled.ompt_callback_mutex_released) {
1657 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1658 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1659 OMPT_LOAD_RETURN_ADDRESS(0));
1660 }
1661#endif
1662
1663 KMP_POP_PARTITIONED_TIMER();
1664 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1665}
1666
1676kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1677 int status;
1678 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1679 __kmp_assert_valid_gtid(global_tid);
1680
1681 if (!TCR_4(__kmp_init_parallel))
1682 __kmp_parallel_initialize();
1683
1684 __kmp_resume_if_soft_paused();
1685
1686 if (__kmp_env_consistency_check)
1687 __kmp_check_barrier(global_tid, ct_barrier, loc);
1688
1689#if OMPT_SUPPORT
1690 ompt_frame_t *ompt_frame;
1691 if (ompt_enabled.enabled) {
1692 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1693 if (ompt_frame->enter_frame.ptr == NULL)
1694 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1695 }
1696 OMPT_STORE_RETURN_ADDRESS(global_tid);
1697#endif
1698#if USE_ITT_NOTIFY
1699 __kmp_threads[global_tid]->th.th_ident = loc;
1700#endif
1701 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1702#if OMPT_SUPPORT && OMPT_OPTIONAL
1703 if (ompt_enabled.enabled) {
1704 ompt_frame->enter_frame = ompt_data_none;
1705 }
1706#endif
1707
1708 return (status != 0) ? 0 : 1;
1709}
1710
1720void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1721 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1722 __kmp_assert_valid_gtid(global_tid);
1723 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1724}
1725
1736kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1737 kmp_int32 ret;
1738 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1739 __kmp_assert_valid_gtid(global_tid);
1740
1741 if (!TCR_4(__kmp_init_parallel))
1742 __kmp_parallel_initialize();
1743
1744 __kmp_resume_if_soft_paused();
1745
1746 if (__kmp_env_consistency_check) {
1747 if (loc == 0) {
1748 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1749 }
1750 __kmp_check_barrier(global_tid, ct_barrier, loc);
1751 }
1752
1753#if OMPT_SUPPORT
1754 ompt_frame_t *ompt_frame;
1755 if (ompt_enabled.enabled) {
1756 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1757 if (ompt_frame->enter_frame.ptr == NULL)
1758 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1759 }
1760 OMPT_STORE_RETURN_ADDRESS(global_tid);
1761#endif
1762#if USE_ITT_NOTIFY
1763 __kmp_threads[global_tid]->th.th_ident = loc;
1764#endif
1765 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1766#if OMPT_SUPPORT && OMPT_OPTIONAL
1767 if (ompt_enabled.enabled) {
1768 ompt_frame->enter_frame = ompt_data_none;
1769 }
1770#endif
1771
1772 ret = __kmpc_master(loc, global_tid);
1773
1774 if (__kmp_env_consistency_check) {
1775 /* there's no __kmpc_end_master called; so the (stats) */
1776 /* actions of __kmpc_end_master are done here */
1777 if (ret) {
1778 /* only one thread should do the pop since only */
1779 /* one did the push (see __kmpc_master()) */
1780 __kmp_pop_sync(global_tid, ct_master, loc);
1781 }
1782 }
1783
1784 return (ret);
1785}
1786
1787/* The BARRIER for a SINGLE process section is always explicit */
1799kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1800 __kmp_assert_valid_gtid(global_tid);
1801 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1802
1803 if (rc) {
1804 // We are going to execute the single statement, so we should count it.
1805 KMP_COUNT_BLOCK(OMP_SINGLE);
1806 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1807 }
1808
1809#if OMPT_SUPPORT && OMPT_OPTIONAL
1810 kmp_info_t *this_thr = __kmp_threads[global_tid];
1811 kmp_team_t *team = this_thr->th.th_team;
1812 int tid = __kmp_tid_from_gtid(global_tid);
1813
1814 if (ompt_enabled.enabled) {
1815 if (rc) {
1816 if (ompt_enabled.ompt_callback_work) {
1817 ompt_callbacks.ompt_callback(ompt_callback_work)(
1818 ompt_work_single_executor, ompt_scope_begin,
1819 &(team->t.ompt_team_info.parallel_data),
1820 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1821 1, OMPT_GET_RETURN_ADDRESS(0));
1822 }
1823 } else {
1824 if (ompt_enabled.ompt_callback_work) {
1825 ompt_callbacks.ompt_callback(ompt_callback_work)(
1826 ompt_work_single_other, ompt_scope_begin,
1827 &(team->t.ompt_team_info.parallel_data),
1828 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1829 1, OMPT_GET_RETURN_ADDRESS(0));
1830 ompt_callbacks.ompt_callback(ompt_callback_work)(
1831 ompt_work_single_other, ompt_scope_end,
1832 &(team->t.ompt_team_info.parallel_data),
1833 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1834 1, OMPT_GET_RETURN_ADDRESS(0));
1835 }
1836 }
1837 }
1838#endif
1839
1840 return rc;
1841}
1842
1852void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1853 __kmp_assert_valid_gtid(global_tid);
1854 __kmp_exit_single(global_tid);
1855 KMP_POP_PARTITIONED_TIMER();
1856
1857#if OMPT_SUPPORT && OMPT_OPTIONAL
1858 kmp_info_t *this_thr = __kmp_threads[global_tid];
1859 kmp_team_t *team = this_thr->th.th_team;
1860 int tid = __kmp_tid_from_gtid(global_tid);
1861
1862 if (ompt_enabled.ompt_callback_work) {
1863 ompt_callbacks.ompt_callback(ompt_callback_work)(
1864 ompt_work_single_executor, ompt_scope_end,
1865 &(team->t.ompt_team_info.parallel_data),
1866 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1867 OMPT_GET_RETURN_ADDRESS(0));
1868 }
1869#endif
1870}
1871
1879void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1880 KMP_POP_PARTITIONED_TIMER();
1881 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1882
1883#if OMPT_SUPPORT && OMPT_OPTIONAL
1884 if (ompt_enabled.ompt_callback_work) {
1885 ompt_work_t ompt_work_type = ompt_work_loop;
1886 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1887 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1888 // Determine workshare type
1889 if (loc != NULL) {
1890 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1891 ompt_work_type = ompt_work_loop;
1892 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1893 ompt_work_type = ompt_work_sections;
1894 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1895 ompt_work_type = ompt_work_distribute;
1896 } else {
1897 // use default set above.
1898 // a warning about this case is provided in __kmpc_for_static_init
1899 }
1900 KMP_DEBUG_ASSERT(ompt_work_type);
1901 }
1902 ompt_callbacks.ompt_callback(ompt_callback_work)(
1903 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1904 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1905 }
1906#endif
1907 if (__kmp_env_consistency_check)
1908 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1909}
1910
1911// User routines which take C-style arguments (call by value)
1912// different from the Fortran equivalent routines
1913
1914void ompc_set_num_threads(int arg) {
1915 // !!!!! TODO: check the per-task binding
1916 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1917}
1918
1919void ompc_set_dynamic(int flag) {
1920 kmp_info_t *thread;
1921
1922 /* For the thread-private implementation of the internal controls */
1923 thread = __kmp_entry_thread();
1924
1925 __kmp_save_internal_controls(thread);
1926
1927 set__dynamic(thread, flag ? true : false);
1928}
1929
1930void ompc_set_nested(int flag) {
1931 kmp_info_t *thread;
1932
1933 /* For the thread-private internal controls implementation */
1934 thread = __kmp_entry_thread();
1935
1936 __kmp_save_internal_controls(thread);
1937
1938 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1939}
1940
1941void ompc_set_max_active_levels(int max_active_levels) {
1942 /* TO DO */
1943 /* we want per-task implementation of this internal control */
1944
1945 /* For the per-thread internal controls implementation */
1946 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1947}
1948
1949void ompc_set_schedule(omp_sched_t kind, int modifier) {
1950 // !!!!! TODO: check the per-task binding
1951 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1952}
1953
1954int ompc_get_ancestor_thread_num(int level) {
1955 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1956}
1957
1958int ompc_get_team_size(int level) {
1959 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1960}
1961
1962/* OpenMP 5.0 Affinity Format API */
1963void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
1964 if (!__kmp_init_serial) {
1965 __kmp_serial_initialize();
1966 }
1967 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1968 format, KMP_STRLEN(format) + 1);
1969}
1970
1971size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
1972 size_t format_size;
1973 if (!__kmp_init_serial) {
1974 __kmp_serial_initialize();
1975 }
1976 format_size = KMP_STRLEN(__kmp_affinity_format);
1977 if (buffer && size) {
1978 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1979 format_size + 1);
1980 }
1981 return format_size;
1982}
1983
1984void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
1985 int gtid;
1986 if (!TCR_4(__kmp_init_middle)) {
1987 __kmp_middle_initialize();
1988 }
1989 __kmp_assign_root_init_mask();
1990 gtid = __kmp_get_gtid();
1991#if KMP_AFFINITY_SUPPORTED
1992 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
1993 __kmp_affinity.flags.reset) {
1994 __kmp_reset_root_init_mask(gtid);
1995 }
1996#endif
1997 __kmp_aux_display_affinity(gtid, format);
1998}
1999
2000size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2001 char const *format) {
2002 int gtid;
2003 size_t num_required;
2004 kmp_str_buf_t capture_buf;
2005 if (!TCR_4(__kmp_init_middle)) {
2006 __kmp_middle_initialize();
2007 }
2008 __kmp_assign_root_init_mask();
2009 gtid = __kmp_get_gtid();
2010#if KMP_AFFINITY_SUPPORTED
2011 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2012 __kmp_affinity.flags.reset) {
2013 __kmp_reset_root_init_mask(gtid);
2014 }
2015#endif
2016 __kmp_str_buf_init(&capture_buf);
2017 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2018 if (buffer && buf_size) {
2019 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2020 capture_buf.used + 1);
2021 }
2022 __kmp_str_buf_free(&capture_buf);
2023 return num_required;
2024}
2025
2026void kmpc_set_stacksize(int arg) {
2027 // __kmp_aux_set_stacksize initializes the library if needed
2028 __kmp_aux_set_stacksize(arg);
2029}
2030
2031void kmpc_set_stacksize_s(size_t arg) {
2032 // __kmp_aux_set_stacksize initializes the library if needed
2033 __kmp_aux_set_stacksize(arg);
2034}
2035
2036void kmpc_set_blocktime(int arg) {
2037 int gtid, tid;
2038 kmp_info_t *thread;
2039
2040 gtid = __kmp_entry_gtid();
2041 tid = __kmp_tid_from_gtid(gtid);
2042 thread = __kmp_thread_from_gtid(gtid);
2043
2044 __kmp_aux_set_blocktime(arg, thread, tid);
2045}
2046
2047void kmpc_set_library(int arg) {
2048 // __kmp_user_set_library initializes the library if needed
2049 __kmp_user_set_library((enum library_type)arg);
2050}
2051
2052void kmpc_set_defaults(char const *str) {
2053 // __kmp_aux_set_defaults initializes the library if needed
2054 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2055}
2056
2057void kmpc_set_disp_num_buffers(int arg) {
2058 // ignore after initialization because some teams have already
2059 // allocated dispatch buffers
2060 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2061 arg <= KMP_MAX_DISP_NUM_BUFF) {
2062 __kmp_dispatch_num_buffers = arg;
2063 }
2064}
2065
2066int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2067#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2068 return -1;
2069#else
2070 if (!TCR_4(__kmp_init_middle)) {
2071 __kmp_middle_initialize();
2072 }
2073 __kmp_assign_root_init_mask();
2074 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2075#endif
2076}
2077
2078int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2079#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2080 return -1;
2081#else
2082 if (!TCR_4(__kmp_init_middle)) {
2083 __kmp_middle_initialize();
2084 }
2085 __kmp_assign_root_init_mask();
2086 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2087#endif
2088}
2089
2090int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2091#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2092 return -1;
2093#else
2094 if (!TCR_4(__kmp_init_middle)) {
2095 __kmp_middle_initialize();
2096 }
2097 __kmp_assign_root_init_mask();
2098 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2099#endif
2100}
2101
2102/* -------------------------------------------------------------------------- */
2147void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2148 void *cpy_data, void (*cpy_func)(void *, void *),
2149 kmp_int32 didit) {
2150 void **data_ptr;
2151 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2152 __kmp_assert_valid_gtid(gtid);
2153
2154 KMP_MB();
2155
2156 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2157
2158 if (__kmp_env_consistency_check) {
2159 if (loc == 0) {
2160 KMP_WARNING(ConstructIdentInvalid);
2161 }
2162 }
2163
2164 // ToDo: Optimize the following two barriers into some kind of split barrier
2165
2166 if (didit)
2167 *data_ptr = cpy_data;
2168
2169#if OMPT_SUPPORT
2170 ompt_frame_t *ompt_frame;
2171 if (ompt_enabled.enabled) {
2172 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2173 if (ompt_frame->enter_frame.ptr == NULL)
2174 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2175 }
2176 OMPT_STORE_RETURN_ADDRESS(gtid);
2177#endif
2178/* This barrier is not a barrier region boundary */
2179#if USE_ITT_NOTIFY
2180 __kmp_threads[gtid]->th.th_ident = loc;
2181#endif
2182 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2183
2184 if (!didit)
2185 (*cpy_func)(cpy_data, *data_ptr);
2186
2187 // Consider next barrier a user-visible barrier for barrier region boundaries
2188 // Nesting checks are already handled by the single construct checks
2189 {
2190#if OMPT_SUPPORT
2191 OMPT_STORE_RETURN_ADDRESS(gtid);
2192#endif
2193#if USE_ITT_NOTIFY
2194 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2195// tasks can overwrite the location)
2196#endif
2197 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2198#if OMPT_SUPPORT && OMPT_OPTIONAL
2199 if (ompt_enabled.enabled) {
2200 ompt_frame->enter_frame = ompt_data_none;
2201 }
2202#endif
2203 }
2204}
2205
2206/* --------------------------------------------------------------------------*/
2223void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2224 void **data_ptr;
2225
2226 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2227
2228 KMP_MB();
2229
2230 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2231
2232 if (__kmp_env_consistency_check) {
2233 if (loc == 0) {
2234 KMP_WARNING(ConstructIdentInvalid);
2235 }
2236 }
2237
2238 // ToDo: Optimize the following barrier
2239
2240 if (cpy_data)
2241 *data_ptr = cpy_data;
2242
2243#if OMPT_SUPPORT
2244 ompt_frame_t *ompt_frame;
2245 if (ompt_enabled.enabled) {
2246 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2247 if (ompt_frame->enter_frame.ptr == NULL)
2248 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2249 OMPT_STORE_RETURN_ADDRESS(gtid);
2250 }
2251#endif
2252/* This barrier is not a barrier region boundary */
2253#if USE_ITT_NOTIFY
2254 __kmp_threads[gtid]->th.th_ident = loc;
2255#endif
2256 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2257
2258 return *data_ptr;
2259}
2260
2261/* -------------------------------------------------------------------------- */
2262
2263#define INIT_LOCK __kmp_init_user_lock_with_checks
2264#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2265#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2266#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2267#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2268#define ACQUIRE_NESTED_LOCK_TIMED \
2269 __kmp_acquire_nested_user_lock_with_checks_timed
2270#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2271#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2272#define TEST_LOCK __kmp_test_user_lock_with_checks
2273#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2274#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2275#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2276
2277// TODO: Make check abort messages use location info & pass it into
2278// with_checks routines
2279
2280#if KMP_USE_DYNAMIC_LOCK
2281
2282// internal lock initializer
2283static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2284 kmp_dyna_lockseq_t seq) {
2285 if (KMP_IS_D_LOCK(seq)) {
2286 KMP_INIT_D_LOCK(lock, seq);
2287#if USE_ITT_BUILD
2288 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2289#endif
2290 } else {
2291 KMP_INIT_I_LOCK(lock, seq);
2292#if USE_ITT_BUILD
2293 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2294 __kmp_itt_lock_creating(ilk->lock, loc);
2295#endif
2296 }
2297}
2298
2299// internal nest lock initializer
2300static __forceinline void
2301__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2302 kmp_dyna_lockseq_t seq) {
2303#if KMP_USE_TSX
2304 // Don't have nested lock implementation for speculative locks
2305 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2306 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2307 seq = __kmp_user_lock_seq;
2308#endif
2309 switch (seq) {
2310 case lockseq_tas:
2311 seq = lockseq_nested_tas;
2312 break;
2313#if KMP_USE_FUTEX
2314 case lockseq_futex:
2315 seq = lockseq_nested_futex;
2316 break;
2317#endif
2318 case lockseq_ticket:
2319 seq = lockseq_nested_ticket;
2320 break;
2321 case lockseq_queuing:
2322 seq = lockseq_nested_queuing;
2323 break;
2324 case lockseq_drdpa:
2325 seq = lockseq_nested_drdpa;
2326 break;
2327 default:
2328 seq = lockseq_nested_queuing;
2329 }
2330 KMP_INIT_I_LOCK(lock, seq);
2331#if USE_ITT_BUILD
2332 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2333 __kmp_itt_lock_creating(ilk->lock, loc);
2334#endif
2335}
2336
2337/* initialize the lock with a hint */
2338void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2339 uintptr_t hint) {
2340 KMP_DEBUG_ASSERT(__kmp_init_serial);
2341 if (__kmp_env_consistency_check && user_lock == NULL) {
2342 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2343 }
2344
2345 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2346
2347#if OMPT_SUPPORT && OMPT_OPTIONAL
2348 // This is the case, if called from omp_init_lock_with_hint:
2349 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2350 if (!codeptr)
2351 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2352 if (ompt_enabled.ompt_callback_lock_init) {
2353 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2354 ompt_mutex_lock, (omp_lock_hint_t)hint,
2355 __ompt_get_mutex_impl_type(user_lock),
2356 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2357 }
2358#endif
2359}
2360
2361/* initialize the lock with a hint */
2362void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2363 void **user_lock, uintptr_t hint) {
2364 KMP_DEBUG_ASSERT(__kmp_init_serial);
2365 if (__kmp_env_consistency_check && user_lock == NULL) {
2366 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2367 }
2368
2369 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2370
2371#if OMPT_SUPPORT && OMPT_OPTIONAL
2372 // This is the case, if called from omp_init_lock_with_hint:
2373 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2374 if (!codeptr)
2375 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2376 if (ompt_enabled.ompt_callback_lock_init) {
2377 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2378 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2379 __ompt_get_mutex_impl_type(user_lock),
2380 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2381 }
2382#endif
2383}
2384
2385#endif // KMP_USE_DYNAMIC_LOCK
2386
2387/* initialize the lock */
2388void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2389#if KMP_USE_DYNAMIC_LOCK
2390
2391 KMP_DEBUG_ASSERT(__kmp_init_serial);
2392 if (__kmp_env_consistency_check && user_lock == NULL) {
2393 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2394 }
2395 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2396
2397#if OMPT_SUPPORT && OMPT_OPTIONAL
2398 // This is the case, if called from omp_init_lock_with_hint:
2399 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2400 if (!codeptr)
2401 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2402 if (ompt_enabled.ompt_callback_lock_init) {
2403 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2404 ompt_mutex_lock, omp_lock_hint_none,
2405 __ompt_get_mutex_impl_type(user_lock),
2406 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2407 }
2408#endif
2409
2410#else // KMP_USE_DYNAMIC_LOCK
2411
2412 static char const *const func = "omp_init_lock";
2413 kmp_user_lock_p lck;
2414 KMP_DEBUG_ASSERT(__kmp_init_serial);
2415
2416 if (__kmp_env_consistency_check) {
2417 if (user_lock == NULL) {
2418 KMP_FATAL(LockIsUninitialized, func);
2419 }
2420 }
2421
2422 KMP_CHECK_USER_LOCK_INIT();
2423
2424 if ((__kmp_user_lock_kind == lk_tas) &&
2425 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2426 lck = (kmp_user_lock_p)user_lock;
2427 }
2428#if KMP_USE_FUTEX
2429 else if ((__kmp_user_lock_kind == lk_futex) &&
2430 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2431 lck = (kmp_user_lock_p)user_lock;
2432 }
2433#endif
2434 else {
2435 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2436 }
2437 INIT_LOCK(lck);
2438 __kmp_set_user_lock_location(lck, loc);
2439
2440#if OMPT_SUPPORT && OMPT_OPTIONAL
2441 // This is the case, if called from omp_init_lock_with_hint:
2442 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2443 if (!codeptr)
2444 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2445 if (ompt_enabled.ompt_callback_lock_init) {
2446 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2447 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2448 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2449 }
2450#endif
2451
2452#if USE_ITT_BUILD
2453 __kmp_itt_lock_creating(lck);
2454#endif /* USE_ITT_BUILD */
2455
2456#endif // KMP_USE_DYNAMIC_LOCK
2457} // __kmpc_init_lock
2458
2459/* initialize the lock */
2460void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2461#if KMP_USE_DYNAMIC_LOCK
2462
2463 KMP_DEBUG_ASSERT(__kmp_init_serial);
2464 if (__kmp_env_consistency_check && user_lock == NULL) {
2465 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2466 }
2467 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2468
2469#if OMPT_SUPPORT && OMPT_OPTIONAL
2470 // This is the case, if called from omp_init_lock_with_hint:
2471 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2472 if (!codeptr)
2473 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2474 if (ompt_enabled.ompt_callback_lock_init) {
2475 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2476 ompt_mutex_nest_lock, omp_lock_hint_none,
2477 __ompt_get_mutex_impl_type(user_lock),
2478 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2479 }
2480#endif
2481
2482#else // KMP_USE_DYNAMIC_LOCK
2483
2484 static char const *const func = "omp_init_nest_lock";
2485 kmp_user_lock_p lck;
2486 KMP_DEBUG_ASSERT(__kmp_init_serial);
2487
2488 if (__kmp_env_consistency_check) {
2489 if (user_lock == NULL) {
2490 KMP_FATAL(LockIsUninitialized, func);
2491 }
2492 }
2493
2494 KMP_CHECK_USER_LOCK_INIT();
2495
2496 if ((__kmp_user_lock_kind == lk_tas) &&
2497 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2498 OMP_NEST_LOCK_T_SIZE)) {
2499 lck = (kmp_user_lock_p)user_lock;
2500 }
2501#if KMP_USE_FUTEX
2502 else if ((__kmp_user_lock_kind == lk_futex) &&
2503 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2504 OMP_NEST_LOCK_T_SIZE)) {
2505 lck = (kmp_user_lock_p)user_lock;
2506 }
2507#endif
2508 else {
2509 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2510 }
2511
2512 INIT_NESTED_LOCK(lck);
2513 __kmp_set_user_lock_location(lck, loc);
2514
2515#if OMPT_SUPPORT && OMPT_OPTIONAL
2516 // This is the case, if called from omp_init_lock_with_hint:
2517 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2518 if (!codeptr)
2519 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2520 if (ompt_enabled.ompt_callback_lock_init) {
2521 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2522 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2523 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2524 }
2525#endif
2526
2527#if USE_ITT_BUILD
2528 __kmp_itt_lock_creating(lck);
2529#endif /* USE_ITT_BUILD */
2530
2531#endif // KMP_USE_DYNAMIC_LOCK
2532} // __kmpc_init_nest_lock
2533
2534void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2535#if KMP_USE_DYNAMIC_LOCK
2536
2537#if USE_ITT_BUILD
2538 kmp_user_lock_p lck;
2539 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2540 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2541 } else {
2542 lck = (kmp_user_lock_p)user_lock;
2543 }
2544 __kmp_itt_lock_destroyed(lck);
2545#endif
2546#if OMPT_SUPPORT && OMPT_OPTIONAL
2547 // This is the case, if called from omp_init_lock_with_hint:
2548 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2549 if (!codeptr)
2550 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2551 if (ompt_enabled.ompt_callback_lock_destroy) {
2552 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2553 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2554 }
2555#endif
2556 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2557#else
2558 kmp_user_lock_p lck;
2559
2560 if ((__kmp_user_lock_kind == lk_tas) &&
2561 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2562 lck = (kmp_user_lock_p)user_lock;
2563 }
2564#if KMP_USE_FUTEX
2565 else if ((__kmp_user_lock_kind == lk_futex) &&
2566 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2567 lck = (kmp_user_lock_p)user_lock;
2568 }
2569#endif
2570 else {
2571 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2572 }
2573
2574#if OMPT_SUPPORT && OMPT_OPTIONAL
2575 // This is the case, if called from omp_init_lock_with_hint:
2576 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2577 if (!codeptr)
2578 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2579 if (ompt_enabled.ompt_callback_lock_destroy) {
2580 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2581 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2582 }
2583#endif
2584
2585#if USE_ITT_BUILD
2586 __kmp_itt_lock_destroyed(lck);
2587#endif /* USE_ITT_BUILD */
2588 DESTROY_LOCK(lck);
2589
2590 if ((__kmp_user_lock_kind == lk_tas) &&
2591 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2592 ;
2593 }
2594#if KMP_USE_FUTEX
2595 else if ((__kmp_user_lock_kind == lk_futex) &&
2596 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2597 ;
2598 }
2599#endif
2600 else {
2601 __kmp_user_lock_free(user_lock, gtid, lck);
2602 }
2603#endif // KMP_USE_DYNAMIC_LOCK
2604} // __kmpc_destroy_lock
2605
2606/* destroy the lock */
2607void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2608#if KMP_USE_DYNAMIC_LOCK
2609
2610#if USE_ITT_BUILD
2611 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2612 __kmp_itt_lock_destroyed(ilk->lock);
2613#endif
2614#if OMPT_SUPPORT && OMPT_OPTIONAL
2615 // This is the case, if called from omp_init_lock_with_hint:
2616 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2617 if (!codeptr)
2618 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2619 if (ompt_enabled.ompt_callback_lock_destroy) {
2620 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2621 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2622 }
2623#endif
2624 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2625
2626#else // KMP_USE_DYNAMIC_LOCK
2627
2628 kmp_user_lock_p lck;
2629
2630 if ((__kmp_user_lock_kind == lk_tas) &&
2631 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2632 OMP_NEST_LOCK_T_SIZE)) {
2633 lck = (kmp_user_lock_p)user_lock;
2634 }
2635#if KMP_USE_FUTEX
2636 else if ((__kmp_user_lock_kind == lk_futex) &&
2637 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2638 OMP_NEST_LOCK_T_SIZE)) {
2639 lck = (kmp_user_lock_p)user_lock;
2640 }
2641#endif
2642 else {
2643 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2644 }
2645
2646#if OMPT_SUPPORT && OMPT_OPTIONAL
2647 // This is the case, if called from omp_init_lock_with_hint:
2648 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2649 if (!codeptr)
2650 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2651 if (ompt_enabled.ompt_callback_lock_destroy) {
2652 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2653 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2654 }
2655#endif
2656
2657#if USE_ITT_BUILD
2658 __kmp_itt_lock_destroyed(lck);
2659#endif /* USE_ITT_BUILD */
2660
2661 DESTROY_NESTED_LOCK(lck);
2662
2663 if ((__kmp_user_lock_kind == lk_tas) &&
2664 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2665 OMP_NEST_LOCK_T_SIZE)) {
2666 ;
2667 }
2668#if KMP_USE_FUTEX
2669 else if ((__kmp_user_lock_kind == lk_futex) &&
2670 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2671 OMP_NEST_LOCK_T_SIZE)) {
2672 ;
2673 }
2674#endif
2675 else {
2676 __kmp_user_lock_free(user_lock, gtid, lck);
2677 }
2678#endif // KMP_USE_DYNAMIC_LOCK
2679} // __kmpc_destroy_nest_lock
2680
2681void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2682 KMP_COUNT_BLOCK(OMP_set_lock);
2683#if KMP_USE_DYNAMIC_LOCK
2684 int tag = KMP_EXTRACT_D_TAG(user_lock);
2685#if USE_ITT_BUILD
2686 __kmp_itt_lock_acquiring(
2687 (kmp_user_lock_p)
2688 user_lock); // itt function will get to the right lock object.
2689#endif
2690#if OMPT_SUPPORT && OMPT_OPTIONAL
2691 // This is the case, if called from omp_init_lock_with_hint:
2692 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2693 if (!codeptr)
2694 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2695 if (ompt_enabled.ompt_callback_mutex_acquire) {
2696 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2697 ompt_mutex_lock, omp_lock_hint_none,
2698 __ompt_get_mutex_impl_type(user_lock),
2699 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2700 }
2701#endif
2702#if KMP_USE_INLINED_TAS
2703 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2704 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2705 } else
2706#elif KMP_USE_INLINED_FUTEX
2707 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2708 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2709 } else
2710#endif
2711 {
2712 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2713 }
2714#if USE_ITT_BUILD
2715 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2716#endif
2717#if OMPT_SUPPORT && OMPT_OPTIONAL
2718 if (ompt_enabled.ompt_callback_mutex_acquired) {
2719 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2720 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2721 }
2722#endif
2723
2724#else // KMP_USE_DYNAMIC_LOCK
2725
2726 kmp_user_lock_p lck;
2727
2728 if ((__kmp_user_lock_kind == lk_tas) &&
2729 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2730 lck = (kmp_user_lock_p)user_lock;
2731 }
2732#if KMP_USE_FUTEX
2733 else if ((__kmp_user_lock_kind == lk_futex) &&
2734 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2735 lck = (kmp_user_lock_p)user_lock;
2736 }
2737#endif
2738 else {
2739 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2740 }
2741
2742#if USE_ITT_BUILD
2743 __kmp_itt_lock_acquiring(lck);
2744#endif /* USE_ITT_BUILD */
2745#if OMPT_SUPPORT && OMPT_OPTIONAL
2746 // This is the case, if called from omp_init_lock_with_hint:
2747 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2748 if (!codeptr)
2749 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2750 if (ompt_enabled.ompt_callback_mutex_acquire) {
2751 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2752 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2753 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2754 }
2755#endif
2756
2757 ACQUIRE_LOCK(lck, gtid);
2758
2759#if USE_ITT_BUILD
2760 __kmp_itt_lock_acquired(lck);
2761#endif /* USE_ITT_BUILD */
2762
2763#if OMPT_SUPPORT && OMPT_OPTIONAL
2764 if (ompt_enabled.ompt_callback_mutex_acquired) {
2765 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2766 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2767 }
2768#endif
2769
2770#endif // KMP_USE_DYNAMIC_LOCK
2771}
2772
2773void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2774#if KMP_USE_DYNAMIC_LOCK
2775
2776#if USE_ITT_BUILD
2777 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2778#endif
2779#if OMPT_SUPPORT && OMPT_OPTIONAL
2780 // This is the case, if called from omp_init_lock_with_hint:
2781 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2782 if (!codeptr)
2783 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2784 if (ompt_enabled.enabled) {
2785 if (ompt_enabled.ompt_callback_mutex_acquire) {
2786 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2787 ompt_mutex_nest_lock, omp_lock_hint_none,
2788 __ompt_get_mutex_impl_type(user_lock),
2789 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2790 }
2791 }
2792#endif
2793 int acquire_status =
2794 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2795 (void)acquire_status;
2796#if USE_ITT_BUILD
2797 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2798#endif
2799
2800#if OMPT_SUPPORT && OMPT_OPTIONAL
2801 if (ompt_enabled.enabled) {
2802 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2803 if (ompt_enabled.ompt_callback_mutex_acquired) {
2804 // lock_first
2805 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2806 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2807 codeptr);
2808 }
2809 } else {
2810 if (ompt_enabled.ompt_callback_nest_lock) {
2811 // lock_next
2812 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2813 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2814 }
2815 }
2816 }
2817#endif
2818
2819#else // KMP_USE_DYNAMIC_LOCK
2820 int acquire_status;
2821 kmp_user_lock_p lck;
2822
2823 if ((__kmp_user_lock_kind == lk_tas) &&
2824 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2825 OMP_NEST_LOCK_T_SIZE)) {
2826 lck = (kmp_user_lock_p)user_lock;
2827 }
2828#if KMP_USE_FUTEX
2829 else if ((__kmp_user_lock_kind == lk_futex) &&
2830 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2831 OMP_NEST_LOCK_T_SIZE)) {
2832 lck = (kmp_user_lock_p)user_lock;
2833 }
2834#endif
2835 else {
2836 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2837 }
2838
2839#if USE_ITT_BUILD
2840 __kmp_itt_lock_acquiring(lck);
2841#endif /* USE_ITT_BUILD */
2842#if OMPT_SUPPORT && OMPT_OPTIONAL
2843 // This is the case, if called from omp_init_lock_with_hint:
2844 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2845 if (!codeptr)
2846 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2847 if (ompt_enabled.enabled) {
2848 if (ompt_enabled.ompt_callback_mutex_acquire) {
2849 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2850 ompt_mutex_nest_lock, omp_lock_hint_none,
2851 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2852 codeptr);
2853 }
2854 }
2855#endif
2856
2857 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2858
2859#if USE_ITT_BUILD
2860 __kmp_itt_lock_acquired(lck);
2861#endif /* USE_ITT_BUILD */
2862
2863#if OMPT_SUPPORT && OMPT_OPTIONAL
2864 if (ompt_enabled.enabled) {
2865 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2866 if (ompt_enabled.ompt_callback_mutex_acquired) {
2867 // lock_first
2868 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2869 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2870 }
2871 } else {
2872 if (ompt_enabled.ompt_callback_nest_lock) {
2873 // lock_next
2874 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2875 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2876 }
2877 }
2878 }
2879#endif
2880
2881#endif // KMP_USE_DYNAMIC_LOCK
2882}
2883
2884void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2885#if KMP_USE_DYNAMIC_LOCK
2886
2887 int tag = KMP_EXTRACT_D_TAG(user_lock);
2888#if USE_ITT_BUILD
2889 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2890#endif
2891#if KMP_USE_INLINED_TAS
2892 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2893 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2894 } else
2895#elif KMP_USE_INLINED_FUTEX
2896 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2897 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2898 } else
2899#endif
2900 {
2901 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2902 }
2903
2904#if OMPT_SUPPORT && OMPT_OPTIONAL
2905 // This is the case, if called from omp_init_lock_with_hint:
2906 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2907 if (!codeptr)
2908 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2909 if (ompt_enabled.ompt_callback_mutex_released) {
2910 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2911 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2912 }
2913#endif
2914
2915#else // KMP_USE_DYNAMIC_LOCK
2916
2917 kmp_user_lock_p lck;
2918
2919 /* Can't use serial interval since not block structured */
2920 /* release the lock */
2921
2922 if ((__kmp_user_lock_kind == lk_tas) &&
2923 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2924#if KMP_OS_LINUX && \
2925 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2926// "fast" path implemented to fix customer performance issue
2927#if USE_ITT_BUILD
2928 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2929#endif /* USE_ITT_BUILD */
2930 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2931 KMP_MB();
2932
2933#if OMPT_SUPPORT && OMPT_OPTIONAL
2934 // This is the case, if called from omp_init_lock_with_hint:
2935 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2936 if (!codeptr)
2937 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2938 if (ompt_enabled.ompt_callback_mutex_released) {
2939 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2940 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2941 }
2942#endif
2943
2944 return;
2945#else
2946 lck = (kmp_user_lock_p)user_lock;
2947#endif
2948 }
2949#if KMP_USE_FUTEX
2950 else if ((__kmp_user_lock_kind == lk_futex) &&
2951 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2952 lck = (kmp_user_lock_p)user_lock;
2953 }
2954#endif
2955 else {
2956 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2957 }
2958
2959#if USE_ITT_BUILD
2960 __kmp_itt_lock_releasing(lck);
2961#endif /* USE_ITT_BUILD */
2962
2963 RELEASE_LOCK(lck, gtid);
2964
2965#if OMPT_SUPPORT && OMPT_OPTIONAL
2966 // This is the case, if called from omp_init_lock_with_hint:
2967 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2968 if (!codeptr)
2969 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2970 if (ompt_enabled.ompt_callback_mutex_released) {
2971 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2972 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2973 }
2974#endif
2975
2976#endif // KMP_USE_DYNAMIC_LOCK
2977}
2978
2979/* release the lock */
2980void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2981#if KMP_USE_DYNAMIC_LOCK
2982
2983#if USE_ITT_BUILD
2984 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2985#endif
2986 int release_status =
2987 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2988 (void)release_status;
2989
2990#if OMPT_SUPPORT && OMPT_OPTIONAL
2991 // This is the case, if called from omp_init_lock_with_hint:
2992 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2993 if (!codeptr)
2994 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2995 if (ompt_enabled.enabled) {
2996 if (release_status == KMP_LOCK_RELEASED) {
2997 if (ompt_enabled.ompt_callback_mutex_released) {
2998 // release_lock_last
2999 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3000 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3001 codeptr);
3002 }
3003 } else if (ompt_enabled.ompt_callback_nest_lock) {
3004 // release_lock_prev
3005 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3006 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3007 }
3008 }
3009#endif
3010
3011#else // KMP_USE_DYNAMIC_LOCK
3012
3013 kmp_user_lock_p lck;
3014
3015 /* Can't use serial interval since not block structured */
3016
3017 if ((__kmp_user_lock_kind == lk_tas) &&
3018 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3019 OMP_NEST_LOCK_T_SIZE)) {
3020#if KMP_OS_LINUX && \
3021 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3022 // "fast" path implemented to fix customer performance issue
3023 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3024#if USE_ITT_BUILD
3025 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3026#endif /* USE_ITT_BUILD */
3027
3028#if OMPT_SUPPORT && OMPT_OPTIONAL
3029 int release_status = KMP_LOCK_STILL_HELD;
3030#endif
3031
3032 if (--(tl->lk.depth_locked) == 0) {
3033 TCW_4(tl->lk.poll, 0);
3034#if OMPT_SUPPORT && OMPT_OPTIONAL
3035 release_status = KMP_LOCK_RELEASED;
3036#endif
3037 }
3038 KMP_MB();
3039
3040#if OMPT_SUPPORT && OMPT_OPTIONAL
3041 // This is the case, if called from omp_init_lock_with_hint:
3042 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3043 if (!codeptr)
3044 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3045 if (ompt_enabled.enabled) {
3046 if (release_status == KMP_LOCK_RELEASED) {
3047 if (ompt_enabled.ompt_callback_mutex_released) {
3048 // release_lock_last
3049 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3050 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3051 }
3052 } else if (ompt_enabled.ompt_callback_nest_lock) {
3053 // release_lock_previous
3054 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3055 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3056 }
3057 }
3058#endif
3059
3060 return;
3061#else
3062 lck = (kmp_user_lock_p)user_lock;
3063#endif
3064 }
3065#if KMP_USE_FUTEX
3066 else if ((__kmp_user_lock_kind == lk_futex) &&
3067 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3068 OMP_NEST_LOCK_T_SIZE)) {
3069 lck = (kmp_user_lock_p)user_lock;
3070 }
3071#endif
3072 else {
3073 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3074 }
3075
3076#if USE_ITT_BUILD
3077 __kmp_itt_lock_releasing(lck);
3078#endif /* USE_ITT_BUILD */
3079
3080 int release_status;
3081 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3082#if OMPT_SUPPORT && OMPT_OPTIONAL
3083 // This is the case, if called from omp_init_lock_with_hint:
3084 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3085 if (!codeptr)
3086 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3087 if (ompt_enabled.enabled) {
3088 if (release_status == KMP_LOCK_RELEASED) {
3089 if (ompt_enabled.ompt_callback_mutex_released) {
3090 // release_lock_last
3091 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3092 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3093 }
3094 } else if (ompt_enabled.ompt_callback_nest_lock) {
3095 // release_lock_previous
3096 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3097 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3098 }
3099 }
3100#endif
3101
3102#endif // KMP_USE_DYNAMIC_LOCK
3103}
3104
3105/* try to acquire the lock */
3106int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3107 KMP_COUNT_BLOCK(OMP_test_lock);
3108
3109#if KMP_USE_DYNAMIC_LOCK
3110 int rc;
3111 int tag = KMP_EXTRACT_D_TAG(user_lock);
3112#if USE_ITT_BUILD
3113 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3114#endif
3115#if OMPT_SUPPORT && OMPT_OPTIONAL
3116 // This is the case, if called from omp_init_lock_with_hint:
3117 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3118 if (!codeptr)
3119 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3120 if (ompt_enabled.ompt_callback_mutex_acquire) {
3121 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3122 ompt_mutex_lock, omp_lock_hint_none,
3123 __ompt_get_mutex_impl_type(user_lock),
3124 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3125 }
3126#endif
3127#if KMP_USE_INLINED_TAS
3128 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3129 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3130 } else
3131#elif KMP_USE_INLINED_FUTEX
3132 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3133 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3134 } else
3135#endif
3136 {
3137 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3138 }
3139 if (rc) {
3140#if USE_ITT_BUILD
3141 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3142#endif
3143#if OMPT_SUPPORT && OMPT_OPTIONAL
3144 if (ompt_enabled.ompt_callback_mutex_acquired) {
3145 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3146 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3147 }
3148#endif
3149 return FTN_TRUE;
3150 } else {
3151#if USE_ITT_BUILD
3152 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3153#endif
3154 return FTN_FALSE;
3155 }
3156
3157#else // KMP_USE_DYNAMIC_LOCK
3158
3159 kmp_user_lock_p lck;
3160 int rc;
3161
3162 if ((__kmp_user_lock_kind == lk_tas) &&
3163 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3164 lck = (kmp_user_lock_p)user_lock;
3165 }
3166#if KMP_USE_FUTEX
3167 else if ((__kmp_user_lock_kind == lk_futex) &&
3168 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3169 lck = (kmp_user_lock_p)user_lock;
3170 }
3171#endif
3172 else {
3173 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3174 }
3175
3176#if USE_ITT_BUILD
3177 __kmp_itt_lock_acquiring(lck);
3178#endif /* USE_ITT_BUILD */
3179#if OMPT_SUPPORT && OMPT_OPTIONAL
3180 // This is the case, if called from omp_init_lock_with_hint:
3181 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3182 if (!codeptr)
3183 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3184 if (ompt_enabled.ompt_callback_mutex_acquire) {
3185 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3186 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3187 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3188 }
3189#endif
3190
3191 rc = TEST_LOCK(lck, gtid);
3192#if USE_ITT_BUILD
3193 if (rc) {
3194 __kmp_itt_lock_acquired(lck);
3195 } else {
3196 __kmp_itt_lock_cancelled(lck);
3197 }
3198#endif /* USE_ITT_BUILD */
3199#if OMPT_SUPPORT && OMPT_OPTIONAL
3200 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3201 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3202 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3203 }
3204#endif
3205
3206 return (rc ? FTN_TRUE : FTN_FALSE);
3207
3208 /* Can't use serial interval since not block structured */
3209
3210#endif // KMP_USE_DYNAMIC_LOCK
3211}
3212
3213/* try to acquire the lock */
3214int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3215#if KMP_USE_DYNAMIC_LOCK
3216 int rc;
3217#if USE_ITT_BUILD
3218 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3219#endif
3220#if OMPT_SUPPORT && OMPT_OPTIONAL
3221 // This is the case, if called from omp_init_lock_with_hint:
3222 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3223 if (!codeptr)
3224 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3225 if (ompt_enabled.ompt_callback_mutex_acquire) {
3226 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3227 ompt_mutex_nest_lock, omp_lock_hint_none,
3228 __ompt_get_mutex_impl_type(user_lock),
3229 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3230 }
3231#endif
3232 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3233#if USE_ITT_BUILD
3234 if (rc) {
3235 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3236 } else {
3237 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3238 }
3239#endif
3240#if OMPT_SUPPORT && OMPT_OPTIONAL
3241 if (ompt_enabled.enabled && rc) {
3242 if (rc == 1) {
3243 if (ompt_enabled.ompt_callback_mutex_acquired) {
3244 // lock_first
3245 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3246 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3247 codeptr);
3248 }
3249 } else {
3250 if (ompt_enabled.ompt_callback_nest_lock) {
3251 // lock_next
3252 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3253 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3254 }
3255 }
3256 }
3257#endif
3258 return rc;
3259
3260#else // KMP_USE_DYNAMIC_LOCK
3261
3262 kmp_user_lock_p lck;
3263 int rc;
3264
3265 if ((__kmp_user_lock_kind == lk_tas) &&
3266 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3267 OMP_NEST_LOCK_T_SIZE)) {
3268 lck = (kmp_user_lock_p)user_lock;
3269 }
3270#if KMP_USE_FUTEX
3271 else if ((__kmp_user_lock_kind == lk_futex) &&
3272 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3273 OMP_NEST_LOCK_T_SIZE)) {
3274 lck = (kmp_user_lock_p)user_lock;
3275 }
3276#endif
3277 else {
3278 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3279 }
3280
3281#if USE_ITT_BUILD
3282 __kmp_itt_lock_acquiring(lck);
3283#endif /* USE_ITT_BUILD */
3284
3285#if OMPT_SUPPORT && OMPT_OPTIONAL
3286 // This is the case, if called from omp_init_lock_with_hint:
3287 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3288 if (!codeptr)
3289 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3290 if (ompt_enabled.enabled) &&
3291 ompt_enabled.ompt_callback_mutex_acquire) {
3292 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3293 ompt_mutex_nest_lock, omp_lock_hint_none,
3294 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3295 codeptr);
3296 }
3297#endif
3298
3299 rc = TEST_NESTED_LOCK(lck, gtid);
3300#if USE_ITT_BUILD
3301 if (rc) {
3302 __kmp_itt_lock_acquired(lck);
3303 } else {
3304 __kmp_itt_lock_cancelled(lck);
3305 }
3306#endif /* USE_ITT_BUILD */
3307#if OMPT_SUPPORT && OMPT_OPTIONAL
3308 if (ompt_enabled.enabled && rc) {
3309 if (rc == 1) {
3310 if (ompt_enabled.ompt_callback_mutex_acquired) {
3311 // lock_first
3312 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3313 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3314 }
3315 } else {
3316 if (ompt_enabled.ompt_callback_nest_lock) {
3317 // lock_next
3318 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3319 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3320 }
3321 }
3322 }
3323#endif
3324 return rc;
3325
3326 /* Can't use serial interval since not block structured */
3327
3328#endif // KMP_USE_DYNAMIC_LOCK
3329}
3330
3331// Interface to fast scalable reduce methods routines
3332
3333// keep the selected method in a thread local structure for cross-function
3334// usage: will be used in __kmpc_end_reduce* functions;
3335// another solution: to re-determine the method one more time in
3336// __kmpc_end_reduce* functions (new prototype required then)
3337// AT: which solution is better?
3338#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3339 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3340
3341#define __KMP_GET_REDUCTION_METHOD(gtid) \
3342 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3343
3344// description of the packed_reduction_method variable: look at the macros in
3345// kmp.h
3346
3347// used in a critical section reduce block
3348static __forceinline void
3349__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3350 kmp_critical_name *crit) {
3351
3352 // this lock was visible to a customer and to the threading profile tool as a
3353 // serial overhead span (although it's used for an internal purpose only)
3354 // why was it visible in previous implementation?
3355 // should we keep it visible in new reduce block?
3356 kmp_user_lock_p lck;
3357
3358#if KMP_USE_DYNAMIC_LOCK
3359
3360 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3361 // Check if it is initialized.
3362 if (*lk == 0) {
3363 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3364 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3365 KMP_GET_D_TAG(__kmp_user_lock_seq));
3366 } else {
3367 __kmp_init_indirect_csptr(crit, loc, global_tid,
3368 KMP_GET_I_TAG(__kmp_user_lock_seq));
3369 }
3370 }
3371 // Branch for accessing the actual lock object and set operation. This
3372 // branching is inevitable since this lock initialization does not follow the
3373 // normal dispatch path (lock table is not used).
3374 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3375 lck = (kmp_user_lock_p)lk;
3376 KMP_DEBUG_ASSERT(lck != NULL);
3377 if (__kmp_env_consistency_check) {
3378 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3379 }
3380 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3381 } else {
3382 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3383 lck = ilk->lock;
3384 KMP_DEBUG_ASSERT(lck != NULL);
3385 if (__kmp_env_consistency_check) {
3386 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3387 }
3388 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3389 }
3390
3391#else // KMP_USE_DYNAMIC_LOCK
3392
3393 // We know that the fast reduction code is only emitted by Intel compilers
3394 // with 32 byte critical sections. If there isn't enough space, then we
3395 // have to use a pointer.
3396 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3397 lck = (kmp_user_lock_p)crit;
3398 } else {
3399 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3400 }
3401 KMP_DEBUG_ASSERT(lck != NULL);
3402
3403 if (__kmp_env_consistency_check)
3404 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3405
3406 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3407
3408#endif // KMP_USE_DYNAMIC_LOCK
3409}
3410
3411// used in a critical section reduce block
3412static __forceinline void
3413__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3414 kmp_critical_name *crit) {
3415
3416 kmp_user_lock_p lck;
3417
3418#if KMP_USE_DYNAMIC_LOCK
3419
3420 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3421 lck = (kmp_user_lock_p)crit;
3422 if (__kmp_env_consistency_check)
3423 __kmp_pop_sync(global_tid, ct_critical, loc);
3424 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3425 } else {
3426 kmp_indirect_lock_t *ilk =
3427 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3428 if (__kmp_env_consistency_check)
3429 __kmp_pop_sync(global_tid, ct_critical, loc);
3430 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3431 }
3432
3433#else // KMP_USE_DYNAMIC_LOCK
3434
3435 // We know that the fast reduction code is only emitted by Intel compilers
3436 // with 32 byte critical sections. If there isn't enough space, then we have
3437 // to use a pointer.
3438 if (__kmp_base_user_lock_size > 32) {
3439 lck = *((kmp_user_lock_p *)crit);
3440 KMP_ASSERT(lck != NULL);
3441 } else {
3442 lck = (kmp_user_lock_p)crit;
3443 }
3444
3445 if (__kmp_env_consistency_check)
3446 __kmp_pop_sync(global_tid, ct_critical, loc);
3447
3448 __kmp_release_user_lock_with_checks(lck, global_tid);
3449
3450#endif // KMP_USE_DYNAMIC_LOCK
3451} // __kmp_end_critical_section_reduce_block
3452
3453static __forceinline int
3454__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3455 int *task_state) {
3456 kmp_team_t *team;
3457
3458 // Check if we are inside the teams construct?
3459 if (th->th.th_teams_microtask) {
3460 *team_p = team = th->th.th_team;
3461 if (team->t.t_level == th->th.th_teams_level) {
3462 // This is reduction at teams construct.
3463 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3464 // Let's swap teams temporarily for the reduction.
3465 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3466 th->th.th_team = team->t.t_parent;
3467 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3468 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3469 *task_state = th->th.th_task_state;
3470 th->th.th_task_state = 0;
3471
3472 return 1;
3473 }
3474 }
3475 return 0;
3476}
3477
3478static __forceinline void
3479__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3480 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3481 th->th.th_info.ds.ds_tid = 0;
3482 th->th.th_team = team;
3483 th->th.th_team_nproc = team->t.t_nproc;
3484 th->th.th_task_team = team->t.t_task_team[task_state];
3485 __kmp_type_convert(task_state, &(th->th.th_task_state));
3486}
3487
3488/* 2.a.i. Reduce Block without a terminating barrier */
3504kmp_int32
3505__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3506 size_t reduce_size, void *reduce_data,
3507 void (*reduce_func)(void *lhs_data, void *rhs_data),
3508 kmp_critical_name *lck) {
3509
3510 KMP_COUNT_BLOCK(REDUCE_nowait);
3511 int retval = 0;
3512 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3513 kmp_info_t *th;
3514 kmp_team_t *team;
3515 int teams_swapped = 0, task_state;
3516 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3517 __kmp_assert_valid_gtid(global_tid);
3518
3519 // why do we need this initialization here at all?
3520 // Reduction clause can not be used as a stand-alone directive.
3521
3522 // do not call __kmp_serial_initialize(), it will be called by
3523 // __kmp_parallel_initialize() if needed
3524 // possible detection of false-positive race by the threadchecker ???
3525 if (!TCR_4(__kmp_init_parallel))
3526 __kmp_parallel_initialize();
3527
3528 __kmp_resume_if_soft_paused();
3529
3530// check correctness of reduce block nesting
3531#if KMP_USE_DYNAMIC_LOCK
3532 if (__kmp_env_consistency_check)
3533 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3534#else
3535 if (__kmp_env_consistency_check)
3536 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3537#endif
3538
3539 th = __kmp_thread_from_gtid(global_tid);
3540 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3541
3542 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3543 // the value should be kept in a variable
3544 // the variable should be either a construct-specific or thread-specific
3545 // property, not a team specific property
3546 // (a thread can reach the next reduce block on the next construct, reduce
3547 // method may differ on the next construct)
3548 // an ident_t "loc" parameter could be used as a construct-specific property
3549 // (what if loc == 0?)
3550 // (if both construct-specific and team-specific variables were shared,
3551 // then unness extra syncs should be needed)
3552 // a thread-specific variable is better regarding two issues above (next
3553 // construct and extra syncs)
3554 // a thread-specific "th_local.reduction_method" variable is used currently
3555 // each thread executes 'determine' and 'set' lines (no need to execute by one
3556 // thread, to avoid unness extra syncs)
3557
3558 packed_reduction_method = __kmp_determine_reduction_method(
3559 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3560 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3561
3562 OMPT_REDUCTION_DECL(th, global_tid);
3563 if (packed_reduction_method == critical_reduce_block) {
3564
3565 OMPT_REDUCTION_BEGIN;
3566
3567 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3568 retval = 1;
3569
3570 } else if (packed_reduction_method == empty_reduce_block) {
3571
3572 OMPT_REDUCTION_BEGIN;
3573
3574 // usage: if team size == 1, no synchronization is required ( Intel
3575 // platforms only )
3576 retval = 1;
3577
3578 } else if (packed_reduction_method == atomic_reduce_block) {
3579
3580 retval = 2;
3581
3582 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3583 // won't be called by the code gen)
3584 // (it's not quite good, because the checking block has been closed by
3585 // this 'pop',
3586 // but atomic operation has not been executed yet, will be executed
3587 // slightly later, literally on next instruction)
3588 if (__kmp_env_consistency_check)
3589 __kmp_pop_sync(global_tid, ct_reduce, loc);
3590
3591 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3592 tree_reduce_block)) {
3593
3594// AT: performance issue: a real barrier here
3595// AT: (if primary thread is slow, other threads are blocked here waiting for
3596// the primary thread to come and release them)
3597// AT: (it's not what a customer might expect specifying NOWAIT clause)
3598// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3599// be confusing to a customer)
3600// AT: another implementation of *barrier_gather*nowait() (or some other design)
3601// might go faster and be more in line with sense of NOWAIT
3602// AT: TO DO: do epcc test and compare times
3603
3604// this barrier should be invisible to a customer and to the threading profile
3605// tool (it's neither a terminating barrier nor customer's code, it's
3606// used for an internal purpose)
3607#if OMPT_SUPPORT
3608 // JP: can this barrier potentially leed to task scheduling?
3609 // JP: as long as there is a barrier in the implementation, OMPT should and
3610 // will provide the barrier events
3611 // so we set-up the necessary frame/return addresses.
3612 ompt_frame_t *ompt_frame;
3613 if (ompt_enabled.enabled) {
3614 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3615 if (ompt_frame->enter_frame.ptr == NULL)
3616 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3617 }
3618 OMPT_STORE_RETURN_ADDRESS(global_tid);
3619#endif
3620#if USE_ITT_NOTIFY
3621 __kmp_threads[global_tid]->th.th_ident = loc;
3622#endif
3623 retval =
3624 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3625 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3626 retval = (retval != 0) ? (0) : (1);
3627#if OMPT_SUPPORT && OMPT_OPTIONAL
3628 if (ompt_enabled.enabled) {
3629 ompt_frame->enter_frame = ompt_data_none;
3630 }
3631#endif
3632
3633 // all other workers except primary thread should do this pop here
3634 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3635 if (__kmp_env_consistency_check) {
3636 if (retval == 0) {
3637 __kmp_pop_sync(global_tid, ct_reduce, loc);
3638 }
3639 }
3640
3641 } else {
3642
3643 // should never reach this block
3644 KMP_ASSERT(0); // "unexpected method"
3645 }
3646 if (teams_swapped) {
3647 __kmp_restore_swapped_teams(th, team, task_state);
3648 }
3649 KA_TRACE(
3650 10,
3651 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3652 global_tid, packed_reduction_method, retval));
3653
3654 return retval;
3655}
3656
3665void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3666 kmp_critical_name *lck) {
3667
3668 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3669
3670 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3671 __kmp_assert_valid_gtid(global_tid);
3672
3673 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3674
3675 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3676
3677 if (packed_reduction_method == critical_reduce_block) {
3678
3679 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3680 OMPT_REDUCTION_END;
3681
3682 } else if (packed_reduction_method == empty_reduce_block) {
3683
3684 // usage: if team size == 1, no synchronization is required ( on Intel
3685 // platforms only )
3686
3687 OMPT_REDUCTION_END;
3688
3689 } else if (packed_reduction_method == atomic_reduce_block) {
3690
3691 // neither primary thread nor other workers should get here
3692 // (code gen does not generate this call in case 2: atomic reduce block)
3693 // actually it's better to remove this elseif at all;
3694 // after removal this value will checked by the 'else' and will assert
3695
3696 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3697 tree_reduce_block)) {
3698
3699 // only primary thread gets here
3700 // OMPT: tree reduction is annotated in the barrier code
3701
3702 } else {
3703
3704 // should never reach this block
3705 KMP_ASSERT(0); // "unexpected method"
3706 }
3707
3708 if (__kmp_env_consistency_check)
3709 __kmp_pop_sync(global_tid, ct_reduce, loc);
3710
3711 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3712 global_tid, packed_reduction_method));
3713
3714 return;
3715}
3716
3717/* 2.a.ii. Reduce Block with a terminating barrier */
3718
3734kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3735 size_t reduce_size, void *reduce_data,
3736 void (*reduce_func)(void *lhs_data, void *rhs_data),
3737 kmp_critical_name *lck) {
3738 KMP_COUNT_BLOCK(REDUCE_wait);
3739 int retval = 0;
3740 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3741 kmp_info_t *th;
3742 kmp_team_t *team;
3743 int teams_swapped = 0, task_state;
3744
3745 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3746 __kmp_assert_valid_gtid(global_tid);
3747
3748 // why do we need this initialization here at all?
3749 // Reduction clause can not be a stand-alone directive.
3750
3751 // do not call __kmp_serial_initialize(), it will be called by
3752 // __kmp_parallel_initialize() if needed
3753 // possible detection of false-positive race by the threadchecker ???
3754 if (!TCR_4(__kmp_init_parallel))
3755 __kmp_parallel_initialize();
3756
3757 __kmp_resume_if_soft_paused();
3758
3759// check correctness of reduce block nesting
3760#if KMP_USE_DYNAMIC_LOCK
3761 if (__kmp_env_consistency_check)
3762 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3763#else
3764 if (__kmp_env_consistency_check)
3765 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3766#endif
3767
3768 th = __kmp_thread_from_gtid(global_tid);
3769 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3770
3771 packed_reduction_method = __kmp_determine_reduction_method(
3772 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3773 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3774
3775 OMPT_REDUCTION_DECL(th, global_tid);
3776
3777 if (packed_reduction_method == critical_reduce_block) {
3778
3779 OMPT_REDUCTION_BEGIN;
3780 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3781 retval = 1;
3782
3783 } else if (packed_reduction_method == empty_reduce_block) {
3784
3785 OMPT_REDUCTION_BEGIN;
3786 // usage: if team size == 1, no synchronization is required ( Intel
3787 // platforms only )
3788 retval = 1;
3789
3790 } else if (packed_reduction_method == atomic_reduce_block) {
3791
3792 retval = 2;
3793
3794 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3795 tree_reduce_block)) {
3796
3797// case tree_reduce_block:
3798// this barrier should be visible to a customer and to the threading profile
3799// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3800#if OMPT_SUPPORT
3801 ompt_frame_t *ompt_frame;
3802 if (ompt_enabled.enabled) {
3803 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3804 if (ompt_frame->enter_frame.ptr == NULL)
3805 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3806 }
3807 OMPT_STORE_RETURN_ADDRESS(global_tid);
3808#endif
3809#if USE_ITT_NOTIFY
3810 __kmp_threads[global_tid]->th.th_ident =
3811 loc; // needed for correct notification of frames
3812#endif
3813 retval =
3814 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3815 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3816 retval = (retval != 0) ? (0) : (1);
3817#if OMPT_SUPPORT && OMPT_OPTIONAL
3818 if (ompt_enabled.enabled) {
3819 ompt_frame->enter_frame = ompt_data_none;
3820 }
3821#endif
3822
3823 // all other workers except primary thread should do this pop here
3824 // (none of other workers except primary will enter __kmpc_end_reduce())
3825 if (__kmp_env_consistency_check) {
3826 if (retval == 0) { // 0: all other workers; 1: primary thread
3827 __kmp_pop_sync(global_tid, ct_reduce, loc);
3828 }
3829 }
3830
3831 } else {
3832
3833 // should never reach this block
3834 KMP_ASSERT(0); // "unexpected method"
3835 }
3836 if (teams_swapped) {
3837 __kmp_restore_swapped_teams(th, team, task_state);
3838 }
3839
3840 KA_TRACE(10,
3841 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3842 global_tid, packed_reduction_method, retval));
3843 return retval;
3844}
3845
3856void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3857 kmp_critical_name *lck) {
3858
3859 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3860 kmp_info_t *th;
3861 kmp_team_t *team;
3862 int teams_swapped = 0, task_state;
3863
3864 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3865 __kmp_assert_valid_gtid(global_tid);
3866
3867 th = __kmp_thread_from_gtid(global_tid);
3868 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3869
3870 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3871
3872 // this barrier should be visible to a customer and to the threading profile
3873 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3874 OMPT_REDUCTION_DECL(th, global_tid);
3875
3876 if (packed_reduction_method == critical_reduce_block) {
3877 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3878
3879 OMPT_REDUCTION_END;
3880
3881// TODO: implicit barrier: should be exposed
3882#if OMPT_SUPPORT
3883 ompt_frame_t *ompt_frame;
3884 if (ompt_enabled.enabled) {
3885 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3886 if (ompt_frame->enter_frame.ptr == NULL)
3887 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3888 }
3889 OMPT_STORE_RETURN_ADDRESS(global_tid);
3890#endif
3891#if USE_ITT_NOTIFY
3892 __kmp_threads[global_tid]->th.th_ident = loc;
3893#endif
3894 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3895#if OMPT_SUPPORT && OMPT_OPTIONAL
3896 if (ompt_enabled.enabled) {
3897 ompt_frame->enter_frame = ompt_data_none;
3898 }
3899#endif
3900
3901 } else if (packed_reduction_method == empty_reduce_block) {
3902
3903 OMPT_REDUCTION_END;
3904
3905// usage: if team size==1, no synchronization is required (Intel platforms only)
3906
3907// TODO: implicit barrier: should be exposed
3908#if OMPT_SUPPORT
3909 ompt_frame_t *ompt_frame;
3910 if (ompt_enabled.enabled) {
3911 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3912 if (ompt_frame->enter_frame.ptr == NULL)
3913 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3914 }
3915 OMPT_STORE_RETURN_ADDRESS(global_tid);
3916#endif
3917#if USE_ITT_NOTIFY
3918 __kmp_threads[global_tid]->th.th_ident = loc;
3919#endif
3920 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3921#if OMPT_SUPPORT && OMPT_OPTIONAL
3922 if (ompt_enabled.enabled) {
3923 ompt_frame->enter_frame = ompt_data_none;
3924 }
3925#endif
3926
3927 } else if (packed_reduction_method == atomic_reduce_block) {
3928
3929#if OMPT_SUPPORT
3930 ompt_frame_t *ompt_frame;
3931 if (ompt_enabled.enabled) {
3932 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3933 if (ompt_frame->enter_frame.ptr == NULL)
3934 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3935 }
3936 OMPT_STORE_RETURN_ADDRESS(global_tid);
3937#endif
3938// TODO: implicit barrier: should be exposed
3939#if USE_ITT_NOTIFY
3940 __kmp_threads[global_tid]->th.th_ident = loc;
3941#endif
3942 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3943#if OMPT_SUPPORT && OMPT_OPTIONAL
3944 if (ompt_enabled.enabled) {
3945 ompt_frame->enter_frame = ompt_data_none;
3946 }
3947#endif
3948
3949 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3950 tree_reduce_block)) {
3951
3952 // only primary thread executes here (primary releases all other workers)
3953 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3954 global_tid);
3955
3956 } else {
3957
3958 // should never reach this block
3959 KMP_ASSERT(0); // "unexpected method"
3960 }
3961 if (teams_swapped) {
3962 __kmp_restore_swapped_teams(th, team, task_state);
3963 }
3964
3965 if (__kmp_env_consistency_check)
3966 __kmp_pop_sync(global_tid, ct_reduce, loc);
3967
3968 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3969 global_tid, packed_reduction_method));
3970
3971 return;
3972}
3973
3974#undef __KMP_GET_REDUCTION_METHOD
3975#undef __KMP_SET_REDUCTION_METHOD
3976
3977/* end of interface to fast scalable reduce routines */
3978
3979kmp_uint64 __kmpc_get_taskid() {
3980
3981 kmp_int32 gtid;
3982 kmp_info_t *thread;
3983
3984 gtid = __kmp_get_gtid();
3985 if (gtid < 0) {
3986 return 0;
3987 }
3988 thread = __kmp_thread_from_gtid(gtid);
3989 return thread->th.th_current_task->td_task_id;
3990
3991} // __kmpc_get_taskid
3992
3993kmp_uint64 __kmpc_get_parent_taskid() {
3994
3995 kmp_int32 gtid;
3996 kmp_info_t *thread;
3997 kmp_taskdata_t *parent_task;
3998
3999 gtid = __kmp_get_gtid();
4000 if (gtid < 0) {
4001 return 0;
4002 }
4003 thread = __kmp_thread_from_gtid(gtid);
4004 parent_task = thread->th.th_current_task->td_parent;
4005 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4006
4007} // __kmpc_get_parent_taskid
4008
4020void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4021 const struct kmp_dim *dims) {
4022 __kmp_assert_valid_gtid(gtid);
4023 int j, idx;
4024 kmp_int64 last, trace_count;
4025 kmp_info_t *th = __kmp_threads[gtid];
4026 kmp_team_t *team = th->th.th_team;
4027 kmp_uint32 *flags;
4028 kmp_disp_t *pr_buf = th->th.th_dispatch;
4029 dispatch_shared_info_t *sh_buf;
4030
4031 KA_TRACE(
4032 20,
4033 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4034 gtid, num_dims, !team->t.t_serialized));
4035 KMP_DEBUG_ASSERT(dims != NULL);
4036 KMP_DEBUG_ASSERT(num_dims > 0);
4037
4038 if (team->t.t_serialized) {
4039 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4040 return; // no dependencies if team is serialized
4041 }
4042 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4043 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4044 // the next loop
4045 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4046
4047 // Save bounds info into allocated private buffer
4048 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4049 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4050 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4051 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4052 pr_buf->th_doacross_info[0] =
4053 (kmp_int64)num_dims; // first element is number of dimensions
4054 // Save also address of num_done in order to access it later without knowing
4055 // the buffer index
4056 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4057 pr_buf->th_doacross_info[2] = dims[0].lo;
4058 pr_buf->th_doacross_info[3] = dims[0].up;
4059 pr_buf->th_doacross_info[4] = dims[0].st;
4060 last = 5;
4061 for (j = 1; j < num_dims; ++j) {
4062 kmp_int64
4063 range_length; // To keep ranges of all dimensions but the first dims[0]
4064 if (dims[j].st == 1) { // most common case
4065 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4066 range_length = dims[j].up - dims[j].lo + 1;
4067 } else {
4068 if (dims[j].st > 0) {
4069 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4070 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4071 } else { // negative increment
4072 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4073 range_length =
4074 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4075 }
4076 }
4077 pr_buf->th_doacross_info[last++] = range_length;
4078 pr_buf->th_doacross_info[last++] = dims[j].lo;
4079 pr_buf->th_doacross_info[last++] = dims[j].up;
4080 pr_buf->th_doacross_info[last++] = dims[j].st;
4081 }
4082
4083 // Compute total trip count.
4084 // Start with range of dims[0] which we don't need to keep in the buffer.
4085 if (dims[0].st == 1) { // most common case
4086 trace_count = dims[0].up - dims[0].lo + 1;
4087 } else if (dims[0].st > 0) {
4088 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4089 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4090 } else { // negative increment
4091 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4092 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4093 }
4094 for (j = 1; j < num_dims; ++j) {
4095 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4096 }
4097 KMP_DEBUG_ASSERT(trace_count > 0);
4098
4099 // Check if shared buffer is not occupied by other loop (idx -
4100 // __kmp_dispatch_num_buffers)
4101 if (idx != sh_buf->doacross_buf_idx) {
4102 // Shared buffer is occupied, wait for it to be free
4103 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4104 __kmp_eq_4, NULL);
4105 }
4106#if KMP_32_BIT_ARCH
4107 // Check if we are the first thread. After the CAS the first thread gets 0,
4108 // others get 1 if initialization is in progress, allocated pointer otherwise.
4109 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4110 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4111 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4112#else
4113 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4114 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4115#endif
4116 if (flags == NULL) {
4117 // we are the first thread, allocate the array of flags
4118 size_t size =
4119 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4120 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4121 KMP_MB();
4122 sh_buf->doacross_flags = flags;
4123 } else if (flags == (kmp_uint32 *)1) {
4124#if KMP_32_BIT_ARCH
4125 // initialization is still in progress, need to wait
4126 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4127#else
4128 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4129#endif
4130 KMP_YIELD(TRUE);
4131 KMP_MB();
4132 } else {
4133 KMP_MB();
4134 }
4135 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4136 pr_buf->th_doacross_flags =
4137 sh_buf->doacross_flags; // save private copy in order to not
4138 // touch shared buffer on each iteration
4139 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4140}
4141
4142void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4143 __kmp_assert_valid_gtid(gtid);
4144 kmp_int64 shft;
4145 size_t num_dims, i;
4146 kmp_uint32 flag;
4147 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4148 kmp_info_t *th = __kmp_threads[gtid];
4149 kmp_team_t *team = th->th.th_team;
4150 kmp_disp_t *pr_buf;
4151 kmp_int64 lo, up, st;
4152
4153 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4154 if (team->t.t_serialized) {
4155 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4156 return; // no dependencies if team is serialized
4157 }
4158
4159 // calculate sequential iteration number and check out-of-bounds condition
4160 pr_buf = th->th.th_dispatch;
4161 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4162 num_dims = (size_t)pr_buf->th_doacross_info[0];
4163 lo = pr_buf->th_doacross_info[2];
4164 up = pr_buf->th_doacross_info[3];
4165 st = pr_buf->th_doacross_info[4];
4166#if OMPT_SUPPORT && OMPT_OPTIONAL
4167 ompt_dependence_t deps[num_dims];
4168#endif
4169 if (st == 1) { // most common case
4170 if (vec[0] < lo || vec[0] > up) {
4171 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4172 "bounds [%lld,%lld]\n",
4173 gtid, vec[0], lo, up));
4174 return;
4175 }
4176 iter_number = vec[0] - lo;
4177 } else if (st > 0) {
4178 if (vec[0] < lo || vec[0] > up) {
4179 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4180 "bounds [%lld,%lld]\n",
4181 gtid, vec[0], lo, up));
4182 return;
4183 }
4184 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4185 } else { // negative increment
4186 if (vec[0] > lo || vec[0] < up) {
4187 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4188 "bounds [%lld,%lld]\n",
4189 gtid, vec[0], lo, up));
4190 return;
4191 }
4192 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4193 }
4194#if OMPT_SUPPORT && OMPT_OPTIONAL
4195 deps[0].variable.value = iter_number;
4196 deps[0].dependence_type = ompt_dependence_type_sink;
4197#endif
4198 for (i = 1; i < num_dims; ++i) {
4199 kmp_int64 iter, ln;
4200 size_t j = i * 4;
4201 ln = pr_buf->th_doacross_info[j + 1];
4202 lo = pr_buf->th_doacross_info[j + 2];
4203 up = pr_buf->th_doacross_info[j + 3];
4204 st = pr_buf->th_doacross_info[j + 4];
4205 if (st == 1) {
4206 if (vec[i] < lo || vec[i] > up) {
4207 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4208 "bounds [%lld,%lld]\n",
4209 gtid, vec[i], lo, up));
4210 return;
4211 }
4212 iter = vec[i] - lo;
4213 } else if (st > 0) {
4214 if (vec[i] < lo || vec[i] > up) {
4215 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4216 "bounds [%lld,%lld]\n",
4217 gtid, vec[i], lo, up));
4218 return;
4219 }
4220 iter = (kmp_uint64)(vec[i] - lo) / st;
4221 } else { // st < 0
4222 if (vec[i] > lo || vec[i] < up) {
4223 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4224 "bounds [%lld,%lld]\n",
4225 gtid, vec[i], lo, up));
4226 return;
4227 }
4228 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4229 }
4230 iter_number = iter + ln * iter_number;
4231#if OMPT_SUPPORT && OMPT_OPTIONAL
4232 deps[i].variable.value = iter;
4233 deps[i].dependence_type = ompt_dependence_type_sink;
4234#endif
4235 }
4236 shft = iter_number % 32; // use 32-bit granularity
4237 iter_number >>= 5; // divided by 32
4238 flag = 1 << shft;
4239 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4240 KMP_YIELD(TRUE);
4241 }
4242 KMP_MB();
4243#if OMPT_SUPPORT && OMPT_OPTIONAL
4244 if (ompt_enabled.ompt_callback_dependences) {
4245 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4246 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4247 }
4248#endif
4249 KA_TRACE(20,
4250 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4251 gtid, (iter_number << 5) + shft));
4252}
4253
4254void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4255 __kmp_assert_valid_gtid(gtid);
4256 kmp_int64 shft;
4257 size_t num_dims, i;
4258 kmp_uint32 flag;
4259 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4260 kmp_info_t *th = __kmp_threads[gtid];
4261 kmp_team_t *team = th->th.th_team;
4262 kmp_disp_t *pr_buf;
4263 kmp_int64 lo, st;
4264
4265 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4266 if (team->t.t_serialized) {
4267 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4268 return; // no dependencies if team is serialized
4269 }
4270
4271 // calculate sequential iteration number (same as in "wait" but no
4272 // out-of-bounds checks)
4273 pr_buf = th->th.th_dispatch;
4274 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4275 num_dims = (size_t)pr_buf->th_doacross_info[0];
4276 lo = pr_buf->th_doacross_info[2];
4277 st = pr_buf->th_doacross_info[4];
4278#if OMPT_SUPPORT && OMPT_OPTIONAL
4279 ompt_dependence_t deps[num_dims];
4280#endif
4281 if (st == 1) { // most common case
4282 iter_number = vec[0] - lo;
4283 } else if (st > 0) {
4284 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4285 } else { // negative increment
4286 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4287 }
4288#if OMPT_SUPPORT && OMPT_OPTIONAL
4289 deps[0].variable.value = iter_number;
4290 deps[0].dependence_type = ompt_dependence_type_source;
4291#endif
4292 for (i = 1; i < num_dims; ++i) {
4293 kmp_int64 iter, ln;
4294 size_t j = i * 4;
4295 ln = pr_buf->th_doacross_info[j + 1];
4296 lo = pr_buf->th_doacross_info[j + 2];
4297 st = pr_buf->th_doacross_info[j + 4];
4298 if (st == 1) {
4299 iter = vec[i] - lo;
4300 } else if (st > 0) {
4301 iter = (kmp_uint64)(vec[i] - lo) / st;
4302 } else { // st < 0
4303 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4304 }
4305 iter_number = iter + ln * iter_number;
4306#if OMPT_SUPPORT && OMPT_OPTIONAL
4307 deps[i].variable.value = iter;
4308 deps[i].dependence_type = ompt_dependence_type_source;
4309#endif
4310 }
4311#if OMPT_SUPPORT && OMPT_OPTIONAL
4312 if (ompt_enabled.ompt_callback_dependences) {
4313 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4314 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4315 }
4316#endif
4317 shft = iter_number % 32; // use 32-bit granularity
4318 iter_number >>= 5; // divided by 32
4319 flag = 1 << shft;
4320 KMP_MB();
4321 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4322 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4323 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4324 (iter_number << 5) + shft));
4325}
4326
4327void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4328 __kmp_assert_valid_gtid(gtid);
4329 kmp_int32 num_done;
4330 kmp_info_t *th = __kmp_threads[gtid];
4331 kmp_team_t *team = th->th.th_team;
4332 kmp_disp_t *pr_buf = th->th.th_dispatch;
4333
4334 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4335 if (team->t.t_serialized) {
4336 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4337 return; // nothing to do
4338 }
4339 num_done =
4340 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4341 if (num_done == th->th.th_team_nproc) {
4342 // we are the last thread, need to free shared resources
4343 int idx = pr_buf->th_doacross_buf_idx - 1;
4344 dispatch_shared_info_t *sh_buf =
4345 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4346 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4347 (kmp_int64)&sh_buf->doacross_num_done);
4348 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4349 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4350 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4351 sh_buf->doacross_flags = NULL;
4352 sh_buf->doacross_num_done = 0;
4353 sh_buf->doacross_buf_idx +=
4354 __kmp_dispatch_num_buffers; // free buffer for future re-use
4355 }
4356 // free private resources (need to keep buffer index forever)
4357 pr_buf->th_doacross_flags = NULL;
4358 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4359 pr_buf->th_doacross_info = NULL;
4360 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4361}
4362
4363/* OpenMP 5.1 Memory Management routines */
4364void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4365 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4366}
4367
4368void *omp_aligned_alloc(size_t align, size_t size,
4369 omp_allocator_handle_t allocator) {
4370 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4371}
4372
4373void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4374 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4375}
4376
4377void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4378 omp_allocator_handle_t allocator) {
4379 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4380}
4381
4382void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4383 omp_allocator_handle_t free_allocator) {
4384 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4385 free_allocator);
4386}
4387
4388void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4389 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4390}
4391/* end of OpenMP 5.1 Memory Management routines */
4392
4393int __kmpc_get_target_offload(void) {
4394 if (!__kmp_init_serial) {
4395 __kmp_serial_initialize();
4396 }
4397 return __kmp_target_offload;
4398}
4399
4400int __kmpc_pause_resource(kmp_pause_status_t level) {
4401 if (!__kmp_init_serial) {
4402 return 1; // Can't pause if runtime is not initialized
4403 }
4404 return __kmp_pause_resource(level);
4405}
4406
4407void __kmpc_error(ident_t *loc, int severity, const char *message) {
4408 if (!__kmp_init_serial)
4409 __kmp_serial_initialize();
4410
4411 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4412
4413#if OMPT_SUPPORT
4414 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4415 ompt_callbacks.ompt_callback(ompt_callback_error)(
4416 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4417 OMPT_GET_RETURN_ADDRESS(0));
4418 }
4419#endif // OMPT_SUPPORT
4420
4421 char *src_loc;
4422 if (loc && loc->psource) {
4423 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4424 src_loc =
4425 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4426 __kmp_str_loc_free(&str_loc);
4427 } else {
4428 src_loc = __kmp_str_format("unknown");
4429 }
4430
4431 if (severity == severity_warning)
4432 KMP_WARNING(UserDirectedWarning, src_loc, message);
4433 else
4434 KMP_FATAL(UserDirectedError, src_loc, message);
4435
4436 __kmp_str_free(&src_loc);
4437}
4438
4439// Mark begin of scope directive.
4440void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4441// reserved is for extension of scope directive and not used.
4442#if OMPT_SUPPORT && OMPT_OPTIONAL
4443 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4444 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4445 int tid = __kmp_tid_from_gtid(gtid);
4446 ompt_callbacks.ompt_callback(ompt_callback_work)(
4447 ompt_work_scope, ompt_scope_begin,
4448 &(team->t.ompt_team_info.parallel_data),
4449 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4450 OMPT_GET_RETURN_ADDRESS(0));
4451 }
4452#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4453}
4454
4455// Mark end of scope directive
4456void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4457// reserved is for extension of scope directive and not used.
4458#if OMPT_SUPPORT && OMPT_OPTIONAL
4459 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4460 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4461 int tid = __kmp_tid_from_gtid(gtid);
4462 ompt_callbacks.ompt_callback(ompt_callback_work)(
4463 ompt_work_scope, ompt_scope_end,
4464 &(team->t.ompt_team_info.parallel_data),
4465 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4466 OMPT_GET_RETURN_ADDRESS(0));
4467 }
4468#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4469}
4470
4471#ifdef KMP_USE_VERSION_SYMBOLS
4472// For GOMP compatibility there are two versions of each omp_* API.
4473// One is the plain C symbol and one is the Fortran symbol with an appended
4474// underscore. When we implement a specific ompc_* version of an omp_*
4475// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4476// instead of the Fortran versions in kmp_ftn_entry.h
4477extern "C" {
4478// Have to undef these from omp.h so they aren't translated into
4479// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4480#ifdef omp_set_affinity_format
4481#undef omp_set_affinity_format
4482#endif
4483#ifdef omp_get_affinity_format
4484#undef omp_get_affinity_format
4485#endif
4486#ifdef omp_display_affinity
4487#undef omp_display_affinity
4488#endif
4489#ifdef omp_capture_affinity
4490#undef omp_capture_affinity
4491#endif
4492KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4493 "OMP_5.0");
4494KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4495 "OMP_5.0");
4496KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4497 "OMP_5.0");
4498KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4499 "OMP_5.0");
4500} // extern "C"
4501#endif
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:214
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:216
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:199
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:218
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1626
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:908
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:234
char const * psource
Definition: kmp.h:244
kmp_int32 flags
Definition: kmp.h:236