1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
| //===- LoopGenerators.h - IR helper to create loops -------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains functions to create scalar and OpenMP parallel loops
// as LLVM-IR.
//
//===----------------------------------------------------------------------===//
#ifndef POLLY_LOOP_GENERATORS_H
#define POLLY_LOOP_GENERATORS_H
#include "polly/CodeGen/IRBuilder.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/ADT/SetVector.h"
namespace polly {
using namespace llvm;
/// General scheduling types of parallel OpenMP for loops.
/// Initialization values taken from OpenMP's enum in kmp.h: sched_type.
/// Currently, only 'static' scheduling may change from chunked to non-chunked.
enum class OMPGeneralSchedulingType {
StaticChunked = 33,
StaticNonChunked = 34,
Dynamic = 35,
Guided = 36,
Runtime = 37
};
extern int PollyNumThreads;
extern OMPGeneralSchedulingType PollyScheduling;
extern int PollyChunkSize;
/// Create a scalar do/for-style loop.
///
/// @param LowerBound The starting value of the induction variable.
/// @param UpperBound The upper bound of the induction variable.
/// @param Stride The value by which the induction variable
/// is incremented.
///
/// @param Builder The builder used to create the loop.
/// @param P A pointer to the pass that uses this function.
/// It is used to update analysis information.
/// @param LI The loop info for the current function
/// @param DT The dominator tree we need to update
/// @param ExitBlock The block the loop will exit to.
/// @param Predicate The predicate used to generate the upper loop
/// bound.
/// @param Annotator This function can (optionally) take
/// a ScopAnnotator which
/// annotates loops and alias information in the SCoP.
/// @param Parallel If this loop should be marked parallel in
/// the Annotator.
/// @param UseGuard Create a guard in front of the header to check if
/// the loop is executed at least once, otherwise just
/// assume it.
/// @param LoopVectDisabled If the Loop vectorizer should be disabled for this
/// loop.
///
/// @return Value* The newly created induction variable for this loop.
Value *createLoop(Value *LowerBound, Value *UpperBound, Value *Stride,
PollyIRBuilder &Builder, LoopInfo &LI, DominatorTree &DT,
BasicBlock *&ExitBlock, ICmpInst::Predicate Predicate,
ScopAnnotator *Annotator = NULL, bool Parallel = false,
bool UseGuard = true, bool LoopVectDisabled = false);
/// The ParallelLoopGenerator allows to create parallelized loops
///
/// To parallelize a loop, we perform the following steps:
/// o Generate a subfunction which will hold the loop body.
/// o Create a struct to hold all outer values needed in the loop body.
/// o Create calls to a runtime library to achieve the actual parallelism.
/// These calls will spawn and join threads, define how the work (here the
/// iterations) are distributed between them and make sure each has access
/// to the struct holding all needed values.
///
/// At the moment we support only one parallel runtime, OpenMP.
///
/// If we parallelize the outer loop of the following loop nest,
///
/// S0;
/// for (int i = 0; i < N; i++)
/// for (int j = 0; j < M; j++)
/// S1(i, j);
/// S2;
///
/// we will generate the following code (with different runtime function names):
///
/// S0;
/// auto *values = storeValuesIntoStruct();
/// // Execute subfunction with multiple threads
/// spawn_threads(subfunction, values);
/// join_threads();
/// S2;
///
/// // This function is executed in parallel by different threads
/// void subfunction(values) {
/// while (auto *WorkItem = getWorkItem()) {
/// int LB = WorkItem.begin();
/// int UB = WorkItem.end();
/// for (int i = LB; i < UB; i++)
/// for (int j = 0; j < M; j++)
/// S1(i, j);
/// }
/// cleanup_thread();
/// }
class ParallelLoopGenerator {
public:
/// Create a parallel loop generator for the current function.
ParallelLoopGenerator(PollyIRBuilder &Builder, LoopInfo &LI,
DominatorTree &DT, const DataLayout &DL)
: Builder(Builder), LI(LI), DT(DT),
LongType(
Type::getIntNTy(Builder.getContext(), DL.getPointerSizeInBits())),
M(Builder.GetInsertBlock()->getParent()->getParent()) {}
virtual ~ParallelLoopGenerator() {}
/// Create a parallel loop.
///
/// This function is the main function to automatically generate a parallel
/// loop with all its components.
///
/// @param LB The lower bound for the loop we parallelize.
/// @param UB The upper bound for the loop we parallelize.
/// @param Stride The stride of the loop we parallelize.
/// @param Values A set of LLVM-IR Values that should be available in
/// the new loop body.
/// @param VMap A map to allow outside access to the new versions of
/// the values in @p Values.
/// @param LoopBody A pointer to an iterator that is set to point to the
/// body of the created loop. It should be used to insert
/// instructions that form the actual loop body.
///
/// @return The newly created induction variable for this loop.
Value *createParallelLoop(Value *LB, Value *UB, Value *Stride,
SetVector<Value *> &Values, ValueMapT &VMap,
BasicBlock::iterator *LoopBody);
protected:
/// The IR builder we use to create instructions.
PollyIRBuilder &Builder;
/// The loop info of the current function we need to update.
LoopInfo &LI;
/// The dominance tree of the current function we need to update.
DominatorTree &DT;
/// The type of a "long" on this hardware used for backend calls.
Type *LongType;
/// The current module
Module *M;
public:
/// Create a struct for all @p Values and store them in there.
///
/// @param Values The values which should be stored in the struct.
///
/// @return The created struct.
AllocaInst *storeValuesIntoStruct(SetVector<Value *> &Values);
/// Extract all values from the @p Struct and construct the mapping.
///
/// @param Values The values which were stored in the struct.
/// @param Struct The struct holding all the values in @p Values.
/// @param VMap A map to associate every element of @p Values with the
/// new llvm value loaded from the @p Struct.
void extractValuesFromStruct(SetVector<Value *> Values, Type *Ty,
Value *Struct, ValueMapT &VMap);
/// Create the definition of the parallel subfunction.
///
/// @return A pointer to the subfunction.
Function *createSubFnDefinition();
/// Create the runtime library calls for spawn and join of the worker threads.
/// Additionally, places a call to the specified subfunction.
///
/// @param SubFn The subfunction which holds the loop body.
/// @param SubFnParam The parameter for the subfunction (basically the struct
/// filled with the outside values).
/// @param LB The lower bound for the loop we parallelize.
/// @param UB The upper bound for the loop we parallelize.
/// @param Stride The stride of the loop we parallelize.
virtual void deployParallelExecution(Value *SubFn, Value *SubFnParam,
Value *LB, Value *UB, Value *Stride) = 0;
/// Prepare the definition of the parallel subfunction.
/// Creates the argument list and names them (as well as the subfunction).
///
/// @param F A pointer to the (parallel) subfunction's parent function.
///
/// @return The pointer to the (parallel) subfunction.
virtual Function *prepareSubFnDefinition(Function *F) const = 0;
/// Create the parallel subfunction.
///
/// @param Stride The induction variable increment.
/// @param Struct A struct holding all values in @p Values.
/// @param Values A set of LLVM-IR Values that should be available in
/// the new loop body.
/// @param VMap A map to allow outside access to the new versions of
/// the values in @p Values.
/// @param SubFn The newly created subfunction is returned here.
///
/// @return The newly created induction variable.
virtual std::tuple<Value *, Function *>
createSubFn(Value *Stride, AllocaInst *Struct, SetVector<Value *> UsedValues,
ValueMapT &VMap) = 0;
};
} // end namespace polly
#endif
|