reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276
  277
  278
  279
  280
  281
//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file defines the interface for the loop cache analysis.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H

#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"

namespace llvm {

class LPMUpdater;
using CacheCostTy = int64_t;
using LoopVectorTy = SmallVector<Loop *, 8>;

/// Represents a memory reference as a base pointer and a set of indexing
/// operations. For example given the array reference A[i][2j+1][3k+2] in a
/// 3-dim loop nest:
///   for(i=0;i<n;++i)
///     for(j=0;j<m;++j)
///       for(k=0;k<o;++k)
///         ... A[i][2j+1][3k+2] ...
/// We expect:
///   BasePointer -> A
///   Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>]
///   Sizes -> [m][o][4]
class IndexedReference {
  friend raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);

public:
  /// Construct an indexed reference given a \p StoreOrLoadInst instruction.
  IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI,
                   ScalarEvolution &SE);

  bool isValid() const { return IsValid; }
  const SCEV *getBasePointer() const { return BasePointer; }
  size_t getNumSubscripts() const { return Subscripts.size(); }
  const SCEV *getSubscript(unsigned SubNum) const {
    assert(SubNum < getNumSubscripts() && "Invalid subscript number");
    return Subscripts[SubNum];
  }
  const SCEV *getFirstSubscript() const {
    assert(!Subscripts.empty() && "Expecting non-empty container");
    return Subscripts.front();
  }
  const SCEV *getLastSubscript() const {
    assert(!Subscripts.empty() && "Expecting non-empty container");
    return Subscripts.back();
  }

  /// Return true/false if the current object and the indexed reference \p Other
  /// are/aren't in the same cache line of size \p CLS. Two references are in
  /// the same chace line iff the distance between them in the innermost
  /// dimension is less than the cache line size. Return None if unsure.
  Optional<bool> hasSpacialReuse(const IndexedReference &Other, unsigned CLS,
                                 AliasAnalysis &AA) const;

  /// Return true if the current object and the indexed reference \p Other
  /// have distance smaller than \p MaxDistance in the dimension associated with
  /// the given loop \p L. Return false if the distance is not smaller than \p
  /// MaxDistance and None if unsure.
  Optional<bool> hasTemporalReuse(const IndexedReference &Other,
                                  unsigned MaxDistance, const Loop &L,
                                  DependenceInfo &DI, AliasAnalysis &AA) const;

  /// Compute the cost of the reference w.r.t. the given loop \p L when it is
  /// considered in the innermost position in the loop nest.
  /// The cost is defined as:
  ///   - equal to one if the reference is loop invariant, or
  ///   - equal to '(TripCount * stride) / cache_line_size' if:
  ///     + the reference stride is less than the cache line size, and
  ///     + the coefficient of this loop's index variable used in all other
  ///       subscripts is zero
  ///   - or otherwise equal to 'TripCount'.
  CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const;

private:
  /// Attempt to delinearize the indexed reference.
  bool delinearize(const LoopInfo &LI);

  /// Return true if the index reference is invariant with respect to loop \p L.
  bool isLoopInvariant(const Loop &L) const;

  /// Return true if the indexed reference is 'consecutive' in loop \p L.
  /// An indexed reference is 'consecutive' if the only coefficient that uses
  /// the loop induction variable is the rightmost one, and the access stride is
  /// smaller than the cache line size \p CLS.
  bool isConsecutive(const Loop &L, unsigned CLS) const;

  /// Return the coefficient used in the rightmost dimension.
  const SCEV *getLastCoefficient() const;

  /// Return true if the coefficient corresponding to induction variable of
  /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L.
  bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
                                     const Loop &L) const;

  /// Verify that the given \p Subscript is 'well formed' (must be a simple add
  /// recurrence).
  bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const;

  /// Return true if the given reference \p Other is definetely aliased with
  /// the indexed reference represented by this class.
  bool isAliased(const IndexedReference &Other, AliasAnalysis &AA) const;

private:
  /// True if the reference can be delinearized, false otherwise.
  bool IsValid = false;

  /// Represent the memory reference instruction.
  Instruction &StoreOrLoadInst;

  /// The base pointer of the memory reference.
  const SCEV *BasePointer = nullptr;

  /// The subscript (indexes) of the memory reference.
  SmallVector<const SCEV *, 3> Subscripts;

  /// The dimensions of the memory reference.
  SmallVector<const SCEV *, 3> Sizes;

  ScalarEvolution &SE;
};

/// A reference group represents a set of memory references that exhibit
/// temporal or spacial reuse. Two references belong to the same
/// reference group with respect to a inner loop L iff:
/// 1. they have a loop independent dependency, or
/// 2. they have a loop carried dependence with a small dependence distance
///    (e.g. less than 2) carried by the inner loop, or
/// 3. they refer to the same array, and the subscript in their innermost
///    dimension is less than or equal to 'd' (where 'd' is less than the cache
///    line size)
///
/// Intuitively a reference group represents memory references that access
/// the same cache line. Conditions 1,2 above account for temporal reuse, while
/// contition 3 accounts for spacial reuse.
using ReferenceGroupTy = SmallVector<std::unique_ptr<IndexedReference>, 8>;
using ReferenceGroupsTy = SmallVector<ReferenceGroupTy, 8>;

/// \c CacheCost represents the estimated cost of a inner loop as the number of
/// cache lines used by the memory references it contains.
/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of
/// the cache costs of all of its reference groups when the loop is considered
/// to be in the innermost position in the nest.
/// A reference group represents memory references that fall into the same cache
/// line. Each reference group is analysed with respect to the innermost loop in
/// a loop nest. The cost of a reference is defined as follow:
///  - one if it is loop invariant w.r.t the innermost loop,
///  - equal to the loop trip count divided by the cache line times the
///    reference stride if the reference stride is less than the cache line
///    size (CLS), and the coefficient of this loop's index variable used in all
///    other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride))
///  - equal to the innermost loop trip count if the reference stride is greater
///    or equal to the cache line size CLS.
class CacheCost {
  friend raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
  using LoopTripCountTy = std::pair<const Loop *, unsigned>;
  using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>;

public:
  static CacheCostTy constexpr InvalidCost = -1;

  /// Construct a CacheCost object for the loop nest described by \p Loops.
  /// The optional parameter \p TRT can be used to specify the max. distance
  /// between array elements accessed in a loop so that the elements are
  /// classified to have temporal reuse.
  CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
            TargetTransformInfo &TTI, AliasAnalysis &AA, DependenceInfo &DI,
            Optional<unsigned> TRT = None);

  /// Create a CacheCost for the loop nest rooted by \p Root.
  /// The optional parameter \p TRT can be used to specify the max. distance
  /// between array elements accessed in a loop so that the elements are
  /// classified to have temporal reuse.
  static std::unique_ptr<CacheCost>
  getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI,
               Optional<unsigned> TRT = None);

  /// Return the estimated cost of loop \p L if the given loop is part of the
  /// loop nest associated with this object. Return -1 otherwise.
  CacheCostTy getLoopCost(const Loop &L) const {
    auto IT = std::find_if(
        LoopCosts.begin(), LoopCosts.end(),
        [&L](const LoopCacheCostTy &LCC) { return LCC.first == &L; });
    return (IT != LoopCosts.end()) ? (*IT).second : -1;
  }

  /// Return the estimated ordered loop costs.
  const ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }

private:
  /// Calculate the cache footprint of each loop in the nest (when it is
  /// considered to be in the innermost position).
  void calculateCacheFootprint();

  /// Partition store/load instructions in the loop nest into reference groups.
  /// Two or more memory accesses belong in the same reference group if they
  /// share the same cache line.
  bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const;

  /// Calculate the cost of the given loop \p L assuming it is the innermost
  /// loop in nest.
  CacheCostTy computeLoopCacheCost(const Loop &L,
                                   const ReferenceGroupsTy &RefGroups) const;

  /// Compute the cost of a representative reference in reference group \p RG
  /// when the given loop \p L is considered as the innermost loop in the nest.
  /// The computed cost is an estimate for the number of cache lines used by the
  /// reference group. The representative reference cost is defined as:
  ///   - equal to one if the reference is loop invariant, or
  ///   - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's
  ///     induction variable is used only in the reference subscript associated
  ///     with loop \p L, and (b) the reference stride is less than the cache
  ///     line size, or
  ///   - TripCount otherwise
  CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG,
                                       const Loop &L) const;

  /// Sort the LoopCosts vector by decreasing cache cost.
  void sortLoopCosts() {
    sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
      return A.second > B.second;
    });
  }

private:
  /// Loops in the loop nest associated with this object.
  LoopVectorTy Loops;

  /// Trip counts for the loops in the loop nest associated with this object.
  SmallVector<LoopTripCountTy, 3> TripCounts;

  /// Cache costs for the loops in the loop nest associated with this object.
  SmallVector<LoopCacheCostTy, 3> LoopCosts;

  /// The max. distance between array elements accessed in a loop so that the
  /// elements are classified to have temporal reuse.
  Optional<unsigned> TRT;

  const LoopInfo &LI;
  ScalarEvolution &SE;
  TargetTransformInfo &TTI;
  AliasAnalysis &AA;
  DependenceInfo &DI;
};

raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);

/// Printer pass for the \c CacheCost results.
class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> {
  raw_ostream &OS;

public:
  explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {}

  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
};

} // namespace llvm

#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H