reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
//===------------ sync.h - NVPTX OpenMP synchronizations --------- CUDA -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Include all synchronization.
//
//===----------------------------------------------------------------------===//

#include "omptarget-nvptx.h"
#include "target_impl.h"

////////////////////////////////////////////////////////////////////////////////
// KMP Ordered calls
////////////////////////////////////////////////////////////////////////////////

EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t tid) {
  PRINT0(LD_IO, "call kmpc_ordered\n");
}

EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t tid) {
  PRINT0(LD_IO, "call kmpc_end_ordered\n");
}

////////////////////////////////////////////////////////////////////////////////
// KMP Barriers
////////////////////////////////////////////////////////////////////////////////

// a team is a block: we can use CUDA native synchronization mechanism
// FIXME: what if not all threads (warps) participate to the barrier?
// We may need to implement it differently

EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) {
  PRINT0(LD_IO, "call kmpc_cancel_barrier\n");
  __kmpc_barrier(loc_ref, tid);
  PRINT0(LD_SYNC, "completed kmpc_cancel_barrier\n");
  return 0;
}

EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
  if (checkRuntimeUninitialized(loc_ref)) {
    ASSERT0(LT_FUSSY, checkSPMDMode(loc_ref),
            "Expected SPMD mode with uninitialized runtime.");
    __kmpc_barrier_simple_spmd(loc_ref, tid);
  } else {
    tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc_ref));
    int numberOfActiveOMPThreads =
        GetNumberOfOmpThreads(checkSPMDMode(loc_ref));
    if (numberOfActiveOMPThreads > 1) {
      if (checkSPMDMode(loc_ref)) {
        __kmpc_barrier_simple_spmd(loc_ref, tid);
      } else {
        // The #threads parameter must be rounded up to the WARPSIZE.
        int threads =
            WARPSIZE * ((numberOfActiveOMPThreads + WARPSIZE - 1) / WARPSIZE);

        PRINT(LD_SYNC,
              "call kmpc_barrier with %d omp threads, sync parameter %d\n",
              (int)numberOfActiveOMPThreads, (int)threads);
        // Barrier #1 is for synchronization among active threads.
        named_sync(L1_BARRIER, threads);
      }
    } else {
      // Still need to flush the memory per the standard.
      __kmpc_flush(loc_ref);
    } // numberOfActiveOMPThreads > 1
    PRINT0(LD_SYNC, "completed kmpc_barrier\n");
  }
}

// Emit a simple barrier call in SPMD mode.  Assumes the caller is in an L0
// parallel region and that all worker threads participate.
EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
  PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
  __kmpc_impl_syncthreads();
  PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
}

// Emit a simple barrier call in Generic mode.  Assumes the caller is in an L0
// parallel region and that all worker threads participate.
EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) {
  int numberOfActiveOMPThreads = GetNumberOfThreadsInBlock() - WARPSIZE;
  // The #threads parameter must be rounded up to the WARPSIZE.
  int threads =
      WARPSIZE * ((numberOfActiveOMPThreads + WARPSIZE - 1) / WARPSIZE);

  PRINT(LD_SYNC,
        "call kmpc_barrier_simple_generic with %d omp threads, sync parameter "
        "%d\n",
        (int)numberOfActiveOMPThreads, (int)threads);
  // Barrier #1 is for synchronization among active threads.
  named_sync(L1_BARRIER, threads);
  PRINT0(LD_SYNC, "completed kmpc_barrier_simple_generic\n");
}

////////////////////////////////////////////////////////////////////////////////
// KMP MASTER
////////////////////////////////////////////////////////////////////////////////

EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid) {
  PRINT0(LD_IO, "call kmpc_master\n");
  return IsTeamMaster(global_tid);
}

EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid) {
  PRINT0(LD_IO, "call kmpc_end_master\n");
  ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
}

////////////////////////////////////////////////////////////////////////////////
// KMP SINGLE
////////////////////////////////////////////////////////////////////////////////

EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid) {
  PRINT0(LD_IO, "call kmpc_single\n");
  // decide to implement single with master; master get the single
  return IsTeamMaster(global_tid);
}

EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
  PRINT0(LD_IO, "call kmpc_end_single\n");
  // decide to implement single with master: master get the single
  ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
  // sync barrier is explicitely called... so that is not a problem
}

////////////////////////////////////////////////////////////////////////////////
// Flush
////////////////////////////////////////////////////////////////////////////////

EXTERN void __kmpc_flush(kmp_Ident *loc) {
  PRINT0(LD_IO, "call kmpc_flush\n");
  __threadfence();
}

////////////////////////////////////////////////////////////////////////////////
// Vote
////////////////////////////////////////////////////////////////////////////////

EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() {
  PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
  return __kmpc_impl_activemask();
}

////////////////////////////////////////////////////////////////////////////////
// Syncwarp
////////////////////////////////////////////////////////////////////////////////

EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) {
  PRINT0(LD_IO, "call __kmpc_syncwarp\n");
  __kmpc_impl_syncwarp(Mask);
}