reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
##===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
##===----------------------------------------------------------------------===##
#
# Build the NVPTX (CUDA) Device RTL if the CUDA tools are available
#
##===----------------------------------------------------------------------===##

set(LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER "" CACHE STRING
  "Path to alternate NVCC host compiler to be used by the NVPTX device RTL.")

if(LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER)
  find_program(ALTERNATE_CUDA_HOST_COMPILER NAMES ${LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER})
  if(NOT ALTERNATE_CUDA_HOST_COMPILER)
    libomptarget_say("Not building CUDA offloading device RTL: invalid NVPTX alternate host compiler.")
  endif()
  set(CUDA_HOST_COMPILER ${ALTERNATE_CUDA_HOST_COMPILER} CACHE FILEPATH "" FORCE)
endif()

# We can't use clang as nvcc host preprocessor, so we attempt to replace it with
# gcc.
if(CUDA_HOST_COMPILER MATCHES clang)

  find_program(LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER NAMES gcc)

  if(NOT LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER)
    libomptarget_say("Not building CUDA offloading device RTL: clang is not supported as NVCC host compiler.")
    libomptarget_say("Please include gcc in your path or set LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER to the full path of of valid compiler.")
    return()
  endif()
  set(CUDA_HOST_COMPILER "${LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER}" CACHE FILEPATH "" FORCE)
endif()

get_filename_component(devicertl_base_directory
  ${CMAKE_CURRENT_SOURCE_DIR}
  DIRECTORY)

if(LIBOMPTARGET_DEP_CUDA_FOUND)
  libomptarget_say("Building CUDA offloading device RTL.")

  # We really don't have any host code, so we don't need to care about
  # propagating host flags.
  set(CUDA_PROPAGATE_HOST_FLAGS OFF)

  set(cuda_src_files
      src/cancel.cu
      src/critical.cu
      src/data_sharing.cu
      src/libcall.cu
      src/loop.cu
      src/omptarget-nvptx.cu
      src/parallel.cu
      src/reduction.cu
      src/sync.cu
      src/task.cu
  )

  set(omp_data_objects src/omp_data.cu)

  # Get the compute capability the user requested or use SM_35 by default.
  # SM_35 is what clang uses by default.
  set(default_capabilities 35)
  if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
    set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
    libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")
  endif()
  set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${default_capabilities} CACHE STRING
    "List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.")
  string(REPLACE "," ";" nvptx_sm_list ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES})

  foreach(sm ${nvptx_sm_list})
    set(CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm},code=sm_${sm})
  endforeach()

  # Activate RTL message dumps if requested by the user.
  set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL
    "Activate NVPTX device RTL debug messages.")
  if(${LIBOMPTARGET_NVPTX_DEBUG})
    set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1 -g --ptxas-options=-v)
  endif()

  # NVPTX runtime library has to be statically linked. Dynamic linking is not
  # yet supported by the CUDA toolchain on the device.
  set(BUILD_SHARED_LIBS OFF)
  set(CUDA_SEPARABLE_COMPILATION ON)
  list(APPEND CUDA_NVCC_FLAGS -I${devicertl_base_directory})
  cuda_add_library(omptarget-nvptx STATIC ${cuda_src_files} ${omp_data_objects}
      OPTIONS ${CUDA_ARCH} ${CUDA_DEBUG})

  # Install device RTL under the lib destination folder.
  install(TARGETS omptarget-nvptx ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}")

  target_link_libraries(omptarget-nvptx ${CUDA_LIBRARIES})


  # Check if we can create an LLVM bitcode implementation of the runtime library
  # that could be inlined in the user application. For that we need to find
  # a Clang compiler capable of compiling our CUDA files to LLVM bitcode and
  # an LLVM linker.
  set(LIBOMPTARGET_NVPTX_CUDA_COMPILER "" CACHE STRING
    "Location of a CUDA compiler capable of emitting LLVM bitcode.")
  set(LIBOMPTARGET_NVPTX_BC_LINKER "" CACHE STRING
    "Location of a linker capable of linking LLVM bitcode objects.")

  include(LibomptargetNVPTXBitcodeLibrary)

  set(bclib_default FALSE)
  if (${LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED})
    set(bclib_default TRUE)
  endif()
  set(LIBOMPTARGET_NVPTX_ENABLE_BCLIB ${bclib_default} CACHE BOOL
    "Enable CUDA LLVM bitcode offloading device RTL.")
  if (${LIBOMPTARGET_NVPTX_ENABLE_BCLIB})
    if (NOT ${LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED})
      libomptarget_error_say("Cannot build CUDA LLVM bitcode offloading device RTL!")
    endif()
    libomptarget_say("Building CUDA LLVM bitcode offloading device RTL.")

    # Set flags for LLVM Bitcode compilation.
    set(bc_flags ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS}
                 -I${devicertl_base_directory})
    if(${LIBOMPTARGET_NVPTX_DEBUG})
      set(bc_flags ${bc_flags} -DOMPTARGET_NVPTX_DEBUG=-1)
    else()
      set(bc_flags ${bc_flags} -DOMPTARGET_NVPTX_DEBUG=0)
    endif()

    # CUDA 9 header files use the nv_weak attribute which clang is not yet prepared
    # to handle. Therefore, we use 'weak' instead. We are compiling only for the
    # device, so it should be equivalent.
    if(CUDA_VERSION_MAJOR GREATER 8)
      set(bc_flags ${bc_flags} -Dnv_weak=weak)
    endif()

    # Create target to build all Bitcode libraries.
    add_custom_target(omptarget-nvptx-bc)

    # Generate a Bitcode library for all the compute capabilities the user requested.
    foreach(sm ${nvptx_sm_list})
      set(cuda_arch --cuda-gpu-arch=sm_${sm})

      # Compile CUDA files to bitcode.
      set(bc_files "")
      foreach(src ${cuda_src_files})
        get_filename_component(infile ${src} ABSOLUTE)
        get_filename_component(outfile ${src} NAME)

        add_custom_command(OUTPUT ${outfile}-sm_${sm}.bc
          COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${bc_flags} ${cuda_arch}
            -c ${infile} -o ${outfile}-sm_${sm}.bc
          DEPENDS ${infile}
          IMPLICIT_DEPENDS CXX ${infile}
          COMMENT "Building LLVM bitcode ${outfile}-sm_${sm}.bc"
          VERBATIM
        )
        set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}-sm_${sm}.bc)

        list(APPEND bc_files ${outfile}-sm_${sm}.bc)
      endforeach()

      # Link to a bitcode library.
      add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
          COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER}
            -o ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc ${bc_files}
          DEPENDS ${bc_files}
          COMMENT "Linking LLVM bitcode libomptarget-nvptx-sm_${sm}.bc"
      )
      set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx-sm_${sm}.bc)

      add_custom_target(omptarget-nvptx-${sm}-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc)
      add_dependencies(omptarget-nvptx-bc omptarget-nvptx-${sm}-bc)

      # Copy library to destination.
      add_custom_command(TARGET omptarget-nvptx-${sm}-bc POST_BUILD
                         COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
                         $<TARGET_FILE_DIR:omptarget-nvptx>)

      # Install bitcode library under the lib destination folder.
      install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc DESTINATION "${OPENMP_INSTALL_LIBDIR}")
    endforeach()
  endif()

  add_subdirectory(test)
else()
  libomptarget_say("Not building CUDA offloading device RTL: CUDA tools not found in the system.")
endif()