reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
//==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the SchedRead/Write data for the ARM Cortex-M4 processor.
//
//===----------------------------------------------------------------------===//

def CortexM4Model : SchedMachineModel {
  let IssueWidth        = 1; // Only IT can be dual-issued, so assume single-issue
  let MicroOpBufferSize = 0; // In-order
  let LoadLatency       = 2; // Latency when not pipelined, not pc-relative
  let MispredictPenalty = 2; // Best case branch taken cost
  let PostRAScheduler   = 1;

  let CompleteModel = 0;
  let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasZCZ, HasMVEInt,
          IsNotMClass, HasDPVFP, HasFPARMv8, HasFullFP16, Has8MSecExt, HasV8,
          HasV8_3a, HasTrustZone, HasDFB, IsWindows];
}


// We model the entire cpu as a single pipeline with a BufferSize = 0 since
// Cortex-M4 is in-order.

def M4Unit : ProcResource<1> { let BufferSize = 0; }


let SchedModel = CortexM4Model in {

// Some definitions of latencies we apply to different instructions

class M4UnitL1<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 1; }
class M4UnitL2<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 2; }
class M4UnitL3<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 3; }
class M4UnitL14<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 14; }
def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; }
def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; }
class M4UnitL1I<dag instr> : InstRW<[M4UnitL1_wr], instr>;
class M4UnitL2I<dag instr> : InstRW<[M4UnitL2_wr], instr>;


// Loads, MAC's and DIV all get a higher latency of 2
def : M4UnitL2<WriteLd>;
def : M4UnitL2<WriteMAC32>;
def : M4UnitL2<WriteMAC64Hi>;
def : M4UnitL2<WriteMAC64Lo>;
def : M4UnitL2<WriteMAC16>;
def : M4UnitL2<WriteDIV>;

def : M4UnitL2I<(instregex "(t|t2)LDM")>;
def : M4UnitL2I<(instregex "(t|t2)LDR")>;


// Stores we use a latency of 1 as they have no outputs

def : M4UnitL1<WriteST>;
def : M4UnitL1I<(instregex "(t|t2)STM")>;


// Everything else has a Latency of 1

def : M4UnitL1<WriteALU>;
def : M4UnitL1<WriteALUsi>;
def : M4UnitL1<WriteALUsr>;
def : M4UnitL1<WriteALUSsr>;
def : M4UnitL1<WriteBr>;
def : M4UnitL1<WriteBrL>;
def : M4UnitL1<WriteBrTbl>;
def : M4UnitL1<WriteCMPsi>;
def : M4UnitL1<WriteCMPsr>;
def : M4UnitL1<WriteCMP>;
def : M4UnitL1<WriteMUL32>;
def : M4UnitL1<WriteMUL64Hi>;
def : M4UnitL1<WriteMUL64Lo>;
def : M4UnitL1<WriteMUL16>;
def : M4UnitL1<WriteNoop>;
def : M4UnitL1<WritePreLd>;
def : M4UnitL1I<(instregex "(t|t2)MOV")>;
def : M4UnitL1I<(instrs COPY)>;
def : M4UnitL1I<(instregex "t2IT", "t2MSR", "t2MRS")>;
def : M4UnitL1I<(instregex "t2CLREX")>;
def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", "t2SML[AS]",
    "t2(S|Q|SH|U|UQ|UH|QD)(ADD|ASX|SAX|SUB)", "t2USADA8", "(t|t2)REV")>;

// These instructions are not of much interest to scheduling as they will not
// be generated or it is not very useful to schedule them. They are here to make
// the model more complete.
def : M4UnitL1I<(instregex "t2CDP", "t2LDC", "t2MCR", "t2MRC", "t2MRRC", "t2STC")>;
def : M4UnitL1I<(instregex "tCPS", "t2ISB", "t2DSB", "t2DMB", "t2?HINT$")>;
def : M4UnitL1I<(instregex "t2?UDF$", "tBKPT", "t2DBG")>;
def : M4UnitL1I<(instregex "t?2?Int_eh_sjlj_", "tADDframe", "t?ADJCALL")>;
def : M4UnitL1I<(instregex "CMP_SWAP", "JUMPTABLE", "MEMCPY")>;
def : M4UnitL1I<(instregex "VSETLNi32", "VGETLNi32")>;

def : ReadAdvance<ReadALU, 0>;
def : ReadAdvance<ReadALUsr, 0>;
def : ReadAdvance<ReadMUL, 0>;
def : ReadAdvance<ReadMAC, 0>;

// Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's.
// Loads still take 2 cycles.

def : M4UnitL1<WriteFPCVT>;
def : M4UnitL1<WriteFPMOV>;
def : M4UnitL1<WriteFPALU32>;
def : M4UnitL1<WriteFPALU64>;
def : M4UnitL1<WriteFPMUL32>;
def : M4UnitL1<WriteFPMUL64>;
def : M4UnitL2I<(instregex "VLD")>;
def : M4UnitL1I<(instregex "VST")>;
def : M4UnitL3<WriteFPMAC32>;
def : M4UnitL3<WriteFPMAC64>;
def : M4UnitL14<WriteFPDIV32>;
def : M4UnitL14<WriteFPDIV64>;
def : M4UnitL14<WriteFPSQRT32>;
def : M4UnitL14<WriteFPSQRT64>;
def : M4UnitL1<WriteVLD1>;
def : M4UnitL1<WriteVLD2>;
def : M4UnitL1<WriteVLD3>;
def : M4UnitL1<WriteVLD4>;
def : M4UnitL1<WriteVST1>;
def : M4UnitL1<WriteVST2>;
def : M4UnitL1<WriteVST3>;
def : M4UnitL1<WriteVST4>;
def : M4UnitL1I<(instregex "VMOVS", "FCONSTS", "VCMP", "VNEG", "VABS")>;
def : M4UnitL2I<(instregex "VMOVD")>;
def : M4UnitL1I<(instregex "VMRS", "VMSR", "FMSTAT")>;

def : ReadAdvance<ReadFPMUL, 0>;
def : ReadAdvance<ReadFPMAC, 0>;

}