reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276
  277
  278
  279
  280
  281
  282
  283
  284
  285
  286
  287
  288
  289
  290
  291
  292
  293
  294
  295
  296
  297
  298
  299
  300
  301
  302
  303
  304
  305
  306
  307
  308
  309
  310
  311
  312
  313
  314
  315
  316
  317
  318
  319
  320
  321
  322
  323
  324
  325
  326
  327
  328
  329
  330
  331
  332
  333
  334
  335
  336
  337
  338
  339
  340
  341
  342
  343
  344
  345
  346
  347
  348
  349
  350
  351
  352
  353
  354
  355
  356
  357
  358
  359
  360
  361
  362
  363
  364
  365
  366
  367
  368
  369
  370
  371
  372
  373
  374
  375
  376
  377
  378
  379
  380
  381
  382
  383
  384
  385
  386
  387
  388
  389
  390
  391
  392
  393
  394
  395
  396
  397
  398
  399
  400
  401
  402
  403
  404
  405
  406
  407
  408
  409
  410
  411
  412
  413
  414
  415
  416
  417
  418
  419
  420
  421
  422
  423
  424
  425
  426
//===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//! \file
//! This pass performs merges of loads and stores on both sides of a
//  diamond (hammock). It hoists the loads and sinks the stores.
//
// The algorithm iteratively hoists two loads to the same address out of a
// diamond (hammock) and merges them into a single load in the header. Similar
// it sinks and merges two stores to the tail block (footer). The algorithm
// iterates over the instructions of one side of the diamond and attempts to
// find a matching load/store on the other side. New tail/footer block may be
// insterted if the tail/footer block has more predecessors (not only the two
// predecessors that are forming the diamond). It hoists / sinks when it thinks
// it safe to do so.  This optimization helps with eg. hiding load latencies,
// triggering if-conversion, and reducing static code size.
//
// NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist.
//
//===----------------------------------------------------------------------===//
//
//
// Example:
// Diamond shaped code before merge:
//
//            header:
//                     br %cond, label %if.then, label %if.else
//                        +                    +
//                       +                      +
//                      +                        +
//            if.then:                         if.else:
//               %lt = load %addr_l               %le = load %addr_l
//               <use %lt>                        <use %le>
//               <...>                            <...>
//               store %st, %addr_s               store %se, %addr_s
//               br label %if.end                 br label %if.end
//                     +                         +
//                      +                       +
//                       +                     +
//            if.end ("footer"):
//                     <...>
//
// Diamond shaped code after merge:
//
//            header:
//                     %l = load %addr_l
//                     br %cond, label %if.then, label %if.else
//                        +                    +
//                       +                      +
//                      +                        +
//            if.then:                         if.else:
//               <use %l>                         <use %l>
//               <...>                            <...>
//               br label %if.end                 br label %if.end
//                      +                        +
//                       +                      +
//                        +                    +
//            if.end ("footer"):
//                     %s.sink = phi [%st, if.then], [%se, if.else]
//                     <...>
//                     store %s.sink, %addr_s
//                     <...>
//
//
//===----------------------- TODO -----------------------------------------===//
//
// 1) Generalize to regions other than diamonds
// 2) Be more aggressive merging memory operations
// Note that both changes require register pressure control
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"

using namespace llvm;

#define DEBUG_TYPE "mldst-motion"

namespace {
//===----------------------------------------------------------------------===//
//                         MergedLoadStoreMotion Pass
//===----------------------------------------------------------------------===//
class MergedLoadStoreMotion {
  AliasAnalysis *AA = nullptr;

  // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
  // where Size0 and Size1 are the #instructions on the two sides of
  // the diamond. The constant chosen here is arbitrary. Compiler Time
  // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
  const int MagicCompileTimeControl = 250;

  const bool SplitFooterBB;
public:
  MergedLoadStoreMotion(bool SplitFooterBB) : SplitFooterBB(SplitFooterBB) {}
  bool run(Function &F, AliasAnalysis &AA);

private:
  BasicBlock *getDiamondTail(BasicBlock *BB);
  bool isDiamondHead(BasicBlock *BB);
  // Routines for sinking stores
  StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
  PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
  bool isStoreSinkBarrierInRange(const Instruction &Start,
                                 const Instruction &End, MemoryLocation Loc);
  bool canSinkStoresAndGEPs(StoreInst *S0, StoreInst *S1) const;
  void sinkStoresAndGEPs(BasicBlock *BB, StoreInst *SinkCand,
                         StoreInst *ElseInst);
  bool mergeStores(BasicBlock *BB);
};
} // end anonymous namespace

///
/// Return tail block of a diamond.
///
BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) {
  assert(isDiamondHead(BB) && "Basic block is not head of a diamond");
  return BB->getTerminator()->getSuccessor(0)->getSingleSuccessor();
}

///
/// True when BB is the head of a diamond (hammock)
///
bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
  if (!BB)
    return false;
  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
  if (!BI || !BI->isConditional())
    return false;

  BasicBlock *Succ0 = BI->getSuccessor(0);
  BasicBlock *Succ1 = BI->getSuccessor(1);

  if (!Succ0->getSinglePredecessor())
    return false;
  if (!Succ1->getSinglePredecessor())
    return false;

  BasicBlock *Succ0Succ = Succ0->getSingleSuccessor();
  BasicBlock *Succ1Succ = Succ1->getSingleSuccessor();
  // Ignore triangles.
  if (!Succ0Succ || !Succ1Succ || Succ0Succ != Succ1Succ)
    return false;
  return true;
}


///
/// True when instruction is a sink barrier for a store
/// located in Loc
///
/// Whenever an instruction could possibly read or modify the
/// value being stored or protect against the store from
/// happening it is considered a sink barrier.
///
bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
                                                      const Instruction &End,
                                                      MemoryLocation Loc) {
  for (const Instruction &Inst :
       make_range(Start.getIterator(), End.getIterator()))
    if (Inst.mayThrow())
      return true;
  return AA->canInstructionRangeModRef(Start, End, Loc, ModRefInfo::ModRef);
}

///
/// Check if \p BB contains a store to the same address as \p SI
///
/// \return The store in \p  when it is safe to sink. Otherwise return Null.
///
StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
                                                   StoreInst *Store0) {
  LLVM_DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
  BasicBlock *BB0 = Store0->getParent();
  for (Instruction &Inst : reverse(*BB1)) {
    auto *Store1 = dyn_cast<StoreInst>(&Inst);
    if (!Store1)
      continue;

    MemoryLocation Loc0 = MemoryLocation::get(Store0);
    MemoryLocation Loc1 = MemoryLocation::get(Store1);
    if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
        !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) &&
        !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) {
      return Store1;
    }
  }
  return nullptr;
}

///
/// Create a PHI node in BB for the operands of S0 and S1
///
PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
                                              StoreInst *S1) {
  // Create a phi if the values mismatch.
  Value *Opd1 = S0->getValueOperand();
  Value *Opd2 = S1->getValueOperand();
  if (Opd1 == Opd2)
    return nullptr;

  auto *NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
                                &BB->front());
  NewPN->applyMergedLocation(S0->getDebugLoc(), S1->getDebugLoc());
  NewPN->addIncoming(Opd1, S0->getParent());
  NewPN->addIncoming(Opd2, S1->getParent());
  return NewPN;
}

///
/// Check if 2 stores can be sunk together with corresponding GEPs
///
bool MergedLoadStoreMotion::canSinkStoresAndGEPs(StoreInst *S0,
                                                 StoreInst *S1) const {
  auto *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
  auto *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
  return A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() &&
         (A0->getParent() == S0->getParent()) && A1->hasOneUse() &&
         (A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0);
}

///
/// Merge two stores to same address and sink into \p BB
///
/// Also sinks GEP instruction computing the store address
///
void MergedLoadStoreMotion::sinkStoresAndGEPs(BasicBlock *BB, StoreInst *S0,
                                              StoreInst *S1) {
  // Only one definition?
  auto *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
  auto *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
  LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump();
             dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n";
             dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n");
  // Hoist the instruction.
  BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
  // Intersect optional metadata.
  S0->andIRFlags(S1);
  S0->dropUnknownNonDebugMetadata();

  // Create the new store to be inserted at the join point.
  StoreInst *SNew = cast<StoreInst>(S0->clone());
  Instruction *ANew = A0->clone();
  SNew->insertBefore(&*InsertPt);
  ANew->insertBefore(SNew);

  assert(S0->getParent() == A0->getParent());
  assert(S1->getParent() == A1->getParent());

  // New PHI operand? Use it.
  if (PHINode *NewPN = getPHIOperand(BB, S0, S1))
    SNew->setOperand(0, NewPN);
  S0->eraseFromParent();
  S1->eraseFromParent();
  A0->replaceAllUsesWith(ANew);
  A0->eraseFromParent();
  A1->replaceAllUsesWith(ANew);
  A1->eraseFromParent();
}

///
/// True when two stores are equivalent and can sink into the footer
///
/// Starting from a diamond head block, iterate over the instructions in one
/// successor block and try to match a store in the second successor.
///
bool MergedLoadStoreMotion::mergeStores(BasicBlock *HeadBB) {

  bool MergedStores = false;
  BasicBlock *TailBB = getDiamondTail(HeadBB);
  BasicBlock *SinkBB = TailBB;
  assert(SinkBB && "Footer of a diamond cannot be empty");

  succ_iterator SI = succ_begin(HeadBB);
  assert(SI != succ_end(HeadBB) && "Diamond head cannot have zero successors");
  BasicBlock *Pred0 = *SI;
  ++SI;
  assert(SI != succ_end(HeadBB) && "Diamond head cannot have single successor");
  BasicBlock *Pred1 = *SI;
  // tail block  of a diamond/hammock?
  if (Pred0 == Pred1)
    return false; // No.
  // bail out early if we can not merge into the footer BB
  if (!SplitFooterBB && TailBB->hasNPredecessorsOrMore(3))
    return false;
  // #Instructions in Pred1 for Compile Time Control
  auto InstsNoDbg = Pred1->instructionsWithoutDebug();
  int Size1 = std::distance(InstsNoDbg.begin(), InstsNoDbg.end());
  int NStores = 0;

  for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();
       RBI != RBE;) {

    Instruction *I = &*RBI;
    ++RBI;

    // Don't sink non-simple (atomic, volatile) stores.
    auto *S0 = dyn_cast<StoreInst>(I);
    if (!S0 || !S0->isSimple())
      continue;

    ++NStores;
    if (NStores * Size1 >= MagicCompileTimeControl)
      break;
    if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) {
      if (!canSinkStoresAndGEPs(S0, S1))
        // Don't attempt to sink below stores that had to stick around
        // But after removal of a store and some of its feeding
        // instruction search again from the beginning since the iterator
        // is likely stale at this point.
        break;

      if (SinkBB == TailBB && TailBB->hasNPredecessorsOrMore(3)) {
        // We have more than 2 predecessors. Insert a new block
        // postdominating 2 predecessors we're going to sink from.
        SinkBB = SplitBlockPredecessors(TailBB, {Pred0, Pred1}, ".sink.split");
        if (!SinkBB)
          break;
      }

      MergedStores = true;
      sinkStoresAndGEPs(SinkBB, S0, S1);
      RBI = Pred0->rbegin();
      RBE = Pred0->rend();
      LLVM_DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
    }
  }
  return MergedStores;
}

bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) {
  this->AA = &AA;

  bool Changed = false;
  LLVM_DEBUG(dbgs() << "Instruction Merger\n");

  // Merge unconditional branches, allowing PRE to catch more
  // optimization opportunities.
  // This loop doesn't care about newly inserted/split blocks 
  // since they never will be diamond heads.
  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
    BasicBlock *BB = &*FI++;

    // Hoist equivalent loads and sink stores
    // outside diamonds when possible
    if (isDiamondHead(BB)) {
      Changed |= mergeStores(BB);
    }
  }
  return Changed;
}

namespace {
class MergedLoadStoreMotionLegacyPass : public FunctionPass {
  const bool SplitFooterBB;
public:
  static char ID; // Pass identification, replacement for typeid
  MergedLoadStoreMotionLegacyPass(bool SplitFooterBB = false)
      : FunctionPass(ID), SplitFooterBB(SplitFooterBB) {
    initializeMergedLoadStoreMotionLegacyPassPass(
        *PassRegistry::getPassRegistry());
  }

  ///
  /// Run the transformation for each function
  ///
  bool runOnFunction(Function &F) override {
    if (skipFunction(F))
      return false;
    MergedLoadStoreMotion Impl(SplitFooterBB);
    return Impl.run(F, getAnalysis<AAResultsWrapperPass>().getAAResults());
  }

private:
  void getAnalysisUsage(AnalysisUsage &AU) const override {
    if (!SplitFooterBB)
      AU.setPreservesCFG();
    AU.addRequired<AAResultsWrapperPass>();
    AU.addPreserved<GlobalsAAWrapperPass>();
  }
};

char MergedLoadStoreMotionLegacyPass::ID = 0;
} // anonymous namespace

///
/// createMergedLoadStoreMotionPass - The public interface to this file.
///
FunctionPass *llvm::createMergedLoadStoreMotionPass(bool SplitFooterBB) {
  return new MergedLoadStoreMotionLegacyPass(SplitFooterBB);
}

INITIALIZE_PASS_BEGIN(MergedLoadStoreMotionLegacyPass, "mldst-motion",
                      "MergedLoadStoreMotion", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion",
                    "MergedLoadStoreMotion", false, false)

PreservedAnalyses
MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
  MergedLoadStoreMotion Impl(Options.SplitFooterBB);
  auto &AA = AM.getResult<AAManager>(F);
  if (!Impl.run(F, AA))
    return PreservedAnalyses::all();

  PreservedAnalyses PA;
  if (!Options.SplitFooterBB)
    PA.preserveSet<CFGAnalyses>();
  PA.preserve<GlobalsAA>();
  return PA;
}