reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276
  277
  278
  279
  280
  281
  282
  283
  284
  285
  286
  287
  288
  289
  290
  291
  292
  293
  294
  295
  296
  297
  298
  299
  300
  301
  302
  303
  304
  305
  306
  307
  308
  309
  310
  311
  312
  313
  314
  315
  316
  317
  318
  319
  320
  321
  322
  323
  324
  325
  326
  327
  328
  329
  330
  331
  332
  333
  334
  335
  336
  337
  338
  339
  340
  341
  342
  343
  344
  345
  346
  347
  348
  349
  350
  351
  352
  353
  354
  355
  356
  357
  358
  359
  360
  361
  362
  363
  364
  365
  366
  367
  368
  369
  370
  371
//===- DivRemPairs.cpp - Hoist/[dr]ecompose division and remainder --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass hoists and/or decomposes/recomposes integer division and remainder
// instructions to enable CFG improvements and better codegen.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Scalar/DivRemPairs.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"

using namespace llvm;
using namespace llvm::PatternMatch;

#define DEBUG_TYPE "div-rem-pairs"
STATISTIC(NumPairs, "Number of div/rem pairs");
STATISTIC(NumRecomposed, "Number of instructions recomposed");
STATISTIC(NumHoisted, "Number of instructions hoisted");
STATISTIC(NumDecomposed, "Number of instructions decomposed");
DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
              "Controls transformations in div-rem-pairs pass");

namespace {
struct ExpandedMatch {
  DivRemMapKey Key;
  Instruction *Value;
};
} // namespace

/// See if we can match: (which is the form we expand into)
///   X - ((X ?/ Y) * Y)
/// which is equivalent to:
///   X ?% Y
static llvm::Optional<ExpandedMatch> matchExpandedRem(Instruction &I) {
  Value *Dividend, *XroundedDownToMultipleOfY;
  if (!match(&I, m_Sub(m_Value(Dividend), m_Value(XroundedDownToMultipleOfY))))
    return llvm::None;

  Value *Divisor;
  Instruction *Div;
  // Look for  ((X / Y) * Y)
  if (!match(
          XroundedDownToMultipleOfY,
          m_c_Mul(m_CombineAnd(m_IDiv(m_Specific(Dividend), m_Value(Divisor)),
                               m_Instruction(Div)),
                  m_Deferred(Divisor))))
    return llvm::None;

  ExpandedMatch M;
  M.Key.SignedOp = Div->getOpcode() == Instruction::SDiv;
  M.Key.Dividend = Dividend;
  M.Key.Divisor = Divisor;
  M.Value = &I;
  return M;
}

/// A thin wrapper to store two values that we matched as div-rem pair.
/// We want this extra indirection to avoid dealing with RAUW'ing the map keys.
struct DivRemPairWorklistEntry {
  /// The actual udiv/sdiv instruction. Source of truth.
  AssertingVH<Instruction> DivInst;

  /// The instruction that we have matched as a remainder instruction.
  /// Should only be used as Value, don't introspect it.
  AssertingVH<Instruction> RemInst;

  DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_)
      : DivInst(DivInst_), RemInst(RemInst_) {
    assert((DivInst->getOpcode() == Instruction::UDiv ||
            DivInst->getOpcode() == Instruction::SDiv) &&
           "Not a division.");
    assert(DivInst->getType() == RemInst->getType() && "Types should match.");
    // We can't check anything else about remainder instruction,
    // it's not strictly required to be a urem/srem.
  }

  /// The type for this pair, identical for both the div and rem.
  Type *getType() const { return DivInst->getType(); }

  /// Is this pair signed or unsigned?
  bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; }

  /// In this pair, what are the divident and divisor?
  Value *getDividend() const { return DivInst->getOperand(0); }
  Value *getDivisor() const { return DivInst->getOperand(1); }

  bool isRemExpanded() const {
    switch (RemInst->getOpcode()) {
    case Instruction::SRem:
    case Instruction::URem:
      return false; // single 'rem' instruction - unexpanded form.
    default:
      return true; // anything else means we have remainder in expanded form.
    }
  }
};
using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;

/// Find matching pairs of integer div/rem ops (they have the same numerator,
/// denominator, and signedness). Place those pairs into a worklist for further
/// processing. This indirection is needed because we have to use TrackingVH<>
/// because we will be doing RAUW, and if one of the rem instructions we change
/// happens to be an input to another div/rem in the maps, we'd have problems.
static DivRemWorklistTy getWorklist(Function &F) {
  // Insert all divide and remainder instructions into maps keyed by their
  // operands and opcode (signed or unsigned).
  DenseMap<DivRemMapKey, Instruction *> DivMap;
  // Use a MapVector for RemMap so that instructions are moved/inserted in a
  // deterministic order.
  MapVector<DivRemMapKey, Instruction *> RemMap;
  for (auto &BB : F) {
    for (auto &I : BB) {
      if (I.getOpcode() == Instruction::SDiv)
        DivMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
      else if (I.getOpcode() == Instruction::UDiv)
        DivMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
      else if (I.getOpcode() == Instruction::SRem)
        RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
      else if (I.getOpcode() == Instruction::URem)
        RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
      else if (auto Match = matchExpandedRem(I))
        RemMap[Match->Key] = Match->Value;
    }
  }

  // We'll accumulate the matching pairs of div-rem instructions here.
  DivRemWorklistTy Worklist;

  // We can iterate over either map because we are only looking for matched
  // pairs. Choose remainders for efficiency because they are usually even more
  // rare than division.
  for (auto &RemPair : RemMap) {
    // Find the matching division instruction from the division map.
    Instruction *DivInst = DivMap[RemPair.first];
    if (!DivInst)
      continue;

    // We have a matching pair of div/rem instructions.
    NumPairs++;
    Instruction *RemInst = RemPair.second;

    // Place it in the worklist.
    Worklist.emplace_back(DivInst, RemInst);
  }

  return Worklist;
}

/// Find matching pairs of integer div/rem ops (they have the same numerator,
/// denominator, and signedness). If they exist in different basic blocks, bring
/// them together by hoisting or replace the common division operation that is
/// implicit in the remainder:
/// X % Y <--> X - ((X / Y) * Y).
///
/// We can largely ignore the normal safety and cost constraints on speculation
/// of these ops when we find a matching pair. This is because we are already
/// guaranteed that any exceptions and most cost are already incurred by the
/// first member of the pair.
///
/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
/// SimplifyCFG, but it's split off on its own because it's different enough
/// that it doesn't quite match the stated objectives of those passes.
static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
                           const DominatorTree &DT) {
  bool Changed = false;

  // Get the matching pairs of div-rem instructions. We want this extra
  // indirection to avoid dealing with having to RAUW the keys of the maps.
  DivRemWorklistTy Worklist = getWorklist(F);

  // Process each entry in the worklist.
  for (DivRemPairWorklistEntry &E : Worklist) {
    if (!DebugCounter::shouldExecute(DRPCounter))
      continue;

    bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());

    auto &DivInst = E.DivInst;
    auto &RemInst = E.RemInst;

    const bool RemOriginallyWasInExpandedForm = E.isRemExpanded();
    (void)RemOriginallyWasInExpandedForm; // suppress unused variable warning

    if (HasDivRemOp && E.isRemExpanded()) {
      // The target supports div+rem but the rem is expanded.
      // We should recompose it first.
      Value *X = E.getDividend();
      Value *Y = E.getDivisor();
      Instruction *RealRem = E.isSigned() ? BinaryOperator::CreateSRem(X, Y)
                                          : BinaryOperator::CreateURem(X, Y);
      // Note that we place it right next to the original expanded instruction,
      // and letting further handling to move it if needed.
      RealRem->setName(RemInst->getName() + ".recomposed");
      RealRem->insertAfter(RemInst);
      Instruction *OrigRemInst = RemInst;
      // Update AssertingVH<> with new instruction so it doesn't assert.
      RemInst = RealRem;
      // And replace the original instruction with the new one.
      OrigRemInst->replaceAllUsesWith(RealRem);
      OrigRemInst->eraseFromParent();
      NumRecomposed++;
      // Note that we have left ((X / Y) * Y) around.
      // If it had other uses we could rewrite it as X - X % Y
    }

    assert((!E.isRemExpanded() || !HasDivRemOp) &&
           "*If* the target supports div-rem, then by now the RemInst *is* "
           "Instruction::[US]Rem.");

    // If the target supports div+rem and the instructions are in the same block
    // already, there's nothing to do. The backend should handle this. If the
    // target does not support div+rem, then we will decompose the rem.
    if (HasDivRemOp && RemInst->getParent() == DivInst->getParent())
      continue;

    bool DivDominates = DT.dominates(DivInst, RemInst);
    if (!DivDominates && !DT.dominates(RemInst, DivInst)) {
      // We have matching div-rem pair, but they are in two different blocks,
      // neither of which dominates one another.
      // FIXME: We could hoist both ops to the common predecessor block?
      continue;
    }

    // The target does not have a single div/rem operation,
    // and the rem is already in expanded form. Nothing to do.
    if (!HasDivRemOp && E.isRemExpanded())
      continue;

    if (HasDivRemOp) {
      // The target has a single div/rem operation. Hoist the lower instruction
      // to make the matched pair visible to the backend.
      if (DivDominates)
        RemInst->moveAfter(DivInst);
      else
        DivInst->moveAfter(RemInst);
      NumHoisted++;
    } else {
      // The target does not have a single div/rem operation,
      // and the rem is *not* in a already-expanded form.
      // Decompose the remainder calculation as:
      // X % Y --> X - ((X / Y) * Y).

      assert(!RemOriginallyWasInExpandedForm &&
             "We should not be expanding if the rem was in expanded form to "
             "begin with.");

      Value *X = E.getDividend();
      Value *Y = E.getDivisor();
      Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
      Instruction *Sub = BinaryOperator::CreateSub(X, Mul);

      // If the remainder dominates, then hoist the division up to that block:
      //
      // bb1:
      //   %rem = srem %x, %y
      // bb2:
      //   %div = sdiv %x, %y
      // -->
      // bb1:
      //   %div = sdiv %x, %y
      //   %mul = mul %div, %y
      //   %rem = sub %x, %mul
      //
      // If the division dominates, it's already in the right place. The mul+sub
      // will be in a different block because we don't assume that they are
      // cheap to speculatively execute:
      //
      // bb1:
      //   %div = sdiv %x, %y
      // bb2:
      //   %rem = srem %x, %y
      // -->
      // bb1:
      //   %div = sdiv %x, %y
      // bb2:
      //   %mul = mul %div, %y
      //   %rem = sub %x, %mul
      //
      // If the div and rem are in the same block, we do the same transform,
      // but any code movement would be within the same block.

      if (!DivDominates)
        DivInst->moveBefore(RemInst);
      Mul->insertAfter(RemInst);
      Sub->insertAfter(Mul);

      // Now kill the explicit remainder. We have replaced it with:
      // (sub X, (mul (div X, Y), Y)
      Sub->setName(RemInst->getName() + ".decomposed");
      Instruction *OrigRemInst = RemInst;
      // Update AssertingVH<> with new instruction so it doesn't assert.
      RemInst = Sub;
      // And replace the original instruction with the new one.
      OrigRemInst->replaceAllUsesWith(Sub);
      OrigRemInst->eraseFromParent();
      NumDecomposed++;
    }
    Changed = true;
  }

  return Changed;
}

// Pass manager boilerplate below here.

namespace {
struct DivRemPairsLegacyPass : public FunctionPass {
  static char ID;
  DivRemPairsLegacyPass() : FunctionPass(ID) {
    initializeDivRemPairsLegacyPassPass(*PassRegistry::getPassRegistry());
  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<DominatorTreeWrapperPass>();
    AU.addRequired<TargetTransformInfoWrapperPass>();
    AU.setPreservesCFG();
    AU.addPreserved<DominatorTreeWrapperPass>();
    AU.addPreserved<GlobalsAAWrapperPass>();
    FunctionPass::getAnalysisUsage(AU);
  }

  bool runOnFunction(Function &F) override {
    if (skipFunction(F))
      return false;
    auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    return optimizeDivRem(F, TTI, DT);
  }
};
} // namespace

char DivRemPairsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
                      "Hoist/decompose integer division and remainder", false,
                      false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(DivRemPairsLegacyPass, "div-rem-pairs",
                    "Hoist/decompose integer division and remainder", false,
                    false)
FunctionPass *llvm::createDivRemPairsPass() {
  return new DivRemPairsLegacyPass();
}

PreservedAnalyses DivRemPairsPass::run(Function &F,
                                       FunctionAnalysisManager &FAM) {
  TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
  DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
  if (!optimizeDivRem(F, TTI, DT))
    return PreservedAnalyses::all();
  // TODO: This pass just hoists/replaces math ops - all analyses are preserved?
  PreservedAnalyses PA;
  PA.preserveSet<CFGAnalyses>();
  PA.preserve<GlobalsAA>();
  return PA;
}