reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
//===- LoopGeneratorsKMP.h - IR helper to create loops ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains functions to create scalar and OpenMP parallel loops
// as LLVM-IR.
//
//===----------------------------------------------------------------------===//
#ifndef POLLY_LOOP_GENERATORS_KMP_H
#define POLLY_LOOP_GENERATORS_KMP_H

#include "polly/CodeGen/IRBuilder.h"
#include "polly/CodeGen/LoopGenerators.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/ADT/SetVector.h"

namespace polly {
using namespace llvm;

/// This ParallelLoopGenerator subclass handles the generation of parallelized
/// code, utilizing the LLVM OpenMP library.
class ParallelLoopGeneratorKMP : public ParallelLoopGenerator {
public:
  /// Create a parallel loop generator for the current function.
  ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, LoopInfo &LI,
                           DominatorTree &DT, const DataLayout &DL)
      : ParallelLoopGenerator(Builder, LI, DT, DL) {
    SourceLocationInfo = createSourceLocation();
  }

protected:
  /// The source location struct of this loop.
  /// ident_t = type { i32, i32, i32, i32, i8* }
  GlobalValue *SourceLocationInfo;

  /// Convert the combination of given chunk size and scheduling type (which
  /// might have been set via the command line) into the corresponding
  /// scheduling type. This may result (e.g.) in a 'change' from
  /// "static chunked" scheduling to "static non-chunked" (regarding the
  /// provided and returned scheduling types).
  ///
  /// @param ChunkSize    The chunk size, set via command line or its default.
  /// @param Scheduling   The scheduling, set via command line or its default.
  ///
  /// @return The corresponding OMPGeneralSchedulingType.
  OMPGeneralSchedulingType
  getSchedType(int ChunkSize, OMPGeneralSchedulingType Scheduling) const;

  /// Returns True if 'LongType' is 64bit wide, otherwise: False.
  bool is64BitArch();

public:
  // The functions below may be used if one does not want to generate a
  // specific OpenMP parallel loop, but generate individual parts of it
  // (e.g. the subfunction definition).

  /// Create a runtime library call to spawn the worker threads.
  ///
  /// @param SubFn      The subfunction which holds the loop body.
  /// @param SubFnParam The parameter for the subfunction (basically the struct
  ///                   filled with the outside values).
  /// @param LB         The lower bound for the loop we parallelize.
  /// @param UB         The upper bound for the loop we parallelize.
  /// @param Stride     The stride of the loop we parallelize.
  void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB,
                              Value *UB, Value *Stride);

  void deployParallelExecution(Value *SubFn, Value *SubFnParam, Value *LB,
                               Value *UB, Value *Stride) override;

  virtual Function *prepareSubFnDefinition(Function *F) const override;

  std::tuple<Value *, Function *> createSubFn(Value *Stride, AllocaInst *Struct,
                                              SetVector<Value *> UsedValues,
                                              ValueMapT &VMap) override;

  /// Create a runtime library call to get the current global thread number.
  ///
  /// @return A Value ref which holds the current global thread number.
  Value *createCallGlobalThreadNum();

  /// Create a runtime library call to request a number of threads.
  /// Which will be used in the next OpenMP section (by the next fork).
  ///
  /// @param GlobalThreadID   The global thread ID.
  /// @param NumThreads       The number of threads to use.
  void createCallPushNumThreads(Value *GlobalThreadID, Value *NumThreads);

  /// Create a runtime library call to prepare the OpenMP runtime.
  /// For dynamically scheduled loops, saving the loop arguments.
  ///
  /// @param GlobalThreadID   The global thread ID.
  /// @param LB               The loop's lower bound.
  /// @param UB               The loop's upper bound.
  /// @param Inc              The loop increment.
  /// @param ChunkSize        The chunk size of the parallel loop.
  void createCallDispatchInit(Value *GlobalThreadID, Value *LB, Value *UB,
                              Value *Inc, Value *ChunkSize);

  /// Create a runtime library call to retrieve the next (dynamically)
  /// allocated chunk of work for this thread.
  ///
  /// @param GlobalThreadID   The global thread ID.
  /// @param IsLastPtr        Pointer to a flag, which is set to 1 if this is
  ///                         the last chunk of work, or 0 otherwise.
  /// @param LBPtr            Pointer to the lower bound for the next chunk.
  /// @param UBPtr            Pointer to the upper bound for the next chunk.
  /// @param StridePtr        Pointer to the stride for the next chunk.
  ///
  /// @return A Value which holds 1 if there is work to be done, 0 otherwise.
  Value *createCallDispatchNext(Value *GlobalThreadID, Value *IsLastPtr,
                                Value *LBPtr, Value *UBPtr, Value *StridePtr);

  /// Create a runtime library call to prepare the OpenMP runtime.
  /// For statically scheduled loops, saving the loop arguments.
  ///
  /// @param GlobalThreadID   The global thread ID.
  /// @param IsLastPtr        Pointer to a flag, which is set to 1 if this is
  ///                         the last chunk of work, or 0 otherwise.
  /// @param LBPtr            Pointer to the lower bound for the next chunk.
  /// @param UBPtr            Pointer to the upper bound for the next chunk.
  /// @param StridePtr        Pointer to the stride for the next chunk.
  /// @param ChunkSize        The chunk size of the parallel loop.
  void createCallStaticInit(Value *GlobalThreadID, Value *IsLastPtr,
                            Value *LBPtr, Value *UBPtr, Value *StridePtr,
                            Value *ChunkSize);

  /// Create a runtime library call to mark the end of
  /// a statically scheduled loop.
  ///
  /// @param GlobalThreadID   The global thread ID.
  void createCallStaticFini(Value *GlobalThreadID);

  /// Create the current source location.
  ///
  /// TODO: Generates only(!) dummy values.
  GlobalVariable *createSourceLocation();
};
} // end namespace polly
#endif