reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276
  277
  278
  279
  280
  281
  282
  283
  284
  285
  286
  287
  288
  289
  290
  291
  292
  293
  294
  295
  296
  297
  298
  299
  300
  301
  302
  303
  304
  305
  306
  307
  308
  309
  310
  311
  312
  313
  314
  315
  316
  317
  318
  319
  320
  321
  322
  323
  324
  325
  326
  327
  328
  329
  330
  331
  332
  333
  334
  335
  336
  337
  338
  339
  340
  341
  342
  343
  344
  345
  346
  347
  348
  349
  350
  351
  352
  353
  354
  355
  356
  357
  358
  359
  360
  361
  362
  363
  364
  365
  366
  367
  368
  369
  370
  371
  372
  373
//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This class represents the Lexer for tablegen files.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
#define LLVM_LIB_TABLEGEN_TGLEXER_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/SMLoc.h"
#include <cassert>
#include <map>
#include <memory>
#include <string>

namespace llvm {
class SourceMgr;
class SMLoc;
class Twine;

namespace tgtok {
  enum TokKind {
    // Markers
    Eof, Error,

    // Tokens with no info.
    minus, plus,        // - +
    l_square, r_square, // [ ]
    l_brace, r_brace,   // { }
    l_paren, r_paren,   // ( )
    less, greater,      // < >
    colon, semi,        // : ;
    comma, period,      // , .
    equal, question,    // = ?
    paste,              // #

    // Keywords.
    Bit, Bits, Class, Code, Dag, Def, Foreach, Defm, Field, In, Int, Let, List,
    MultiClass, String, Defset,

    // !keywords.
    XConcat, XADD, XMUL, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XListSplat,
    XStrConcat, XCast, XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty,
    XIf, XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt,

    // Integer value.
    IntVal,

    // Binary constant.  Note that these are sized according to the number of
    // bits given.
    BinaryIntVal,

    // String valued tokens.
    Id, StrVal, VarName, CodeFragment,

    // Preprocessing tokens for internal usage by the lexer.
    // They are never returned as a result of Lex().
    Ifdef, Ifndef, Else, Endif, Define
  };
}

/// TGLexer - TableGen Lexer class.
class TGLexer {
  SourceMgr &SrcMgr;

  const char *CurPtr;
  StringRef CurBuf;

  // Information about the current token.
  const char *TokStart;
  tgtok::TokKind CurCode;
  std::string CurStrVal;  // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
  int64_t CurIntVal;      // This is valid for INTVAL.

  /// CurBuffer - This is the current buffer index we're lexing from as managed
  /// by the SourceMgr object.
  unsigned CurBuffer;

public:
  typedef std::map<std::string, SMLoc> DependenciesMapTy;
private:
  /// Dependencies - This is the list of all included files.
  DependenciesMapTy Dependencies;

public:
  TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);

  tgtok::TokKind Lex() {
    return CurCode = LexToken(CurPtr == CurBuf.begin());
  }

  const DependenciesMapTy &getDependencies() const {
    return Dependencies;
  }

  tgtok::TokKind getCode() const { return CurCode; }

  const std::string &getCurStrVal() const {
    assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
            CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
           "This token doesn't have a string value");
    return CurStrVal;
  }
  int64_t getCurIntVal() const {
    assert(CurCode == tgtok::IntVal && "This token isn't an integer");
    return CurIntVal;
  }
  std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
    assert(CurCode == tgtok::BinaryIntVal &&
           "This token isn't a binary integer");
    return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
  }

  SMLoc getLoc() const;

private:
  /// LexToken - Read the next token and return its code.
  tgtok::TokKind LexToken(bool FileOrLineStart = false);

  tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
  tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);

  int getNextChar();
  int peekNextChar(int Index) const;
  void SkipBCPLComment();
  bool SkipCComment();
  tgtok::TokKind LexIdentifier();
  bool LexInclude();
  tgtok::TokKind LexString();
  tgtok::TokKind LexVarName();
  tgtok::TokKind LexNumber();
  tgtok::TokKind LexBracket();
  tgtok::TokKind LexExclaim();

  // Process EOF encountered in LexToken().
  // If EOF is met in an include file, then the method will update
  // CurPtr, CurBuf and preprocessing include stack, and return true.
  // If EOF is met in the top-level file, then the method will
  // update and check the preprocessing include stack, and return false.
  bool processEOF();

  // *** Structures and methods for preprocessing support ***

  // A set of macro names that are defined either via command line or
  // by using:
  //     #define NAME
  StringSet<> DefinedMacros;

  // Each of #ifdef and #else directives has a descriptor associated
  // with it.
  //
  // An ordered list of preprocessing controls defined by #ifdef/#else
  // directives that are in effect currently is called preprocessing
  // control stack.  It is represented as a vector of PreprocessorControlDesc's.
  //
  // The control stack is updated according to the following rules:
  //
  // For each #ifdef we add an element to the control stack.
  // For each #else we replace the top element with a descriptor
  // with an inverted IsDefined value.
  // For each #endif we pop the top element from the control stack.
  //
  // When CurPtr reaches the current buffer's end, the control stack
  // must be empty, i.e. #ifdef and the corresponding #endif
  // must be located in the same file.
  struct PreprocessorControlDesc {
    // Either tgtok::Ifdef or tgtok::Else.
    tgtok::TokKind Kind;

    // True, if the condition for this directive is true, false - otherwise.
    // Examples:
    //     #ifdef NAME       : true, if NAME is defined, false - otherwise.
    //     ...
    //     #else             : false, if NAME is defined, true - otherwise.
    bool IsDefined;

    // Pointer into CurBuf to the beginning of the preprocessing directive
    // word, e.g.:
    //     #ifdef NAME
    //      ^ - SrcPos
    SMLoc SrcPos;
  };

  // We want to disallow code like this:
  //     file1.td:
  //         #define NAME
  //         #ifdef NAME
  //         include "file2.td"
  //     EOF
  //     file2.td:
  //         #endif
  //     EOF
  //
  // To do this, we clear the preprocessing control stack on entry
  // to each of the included file.  PrepIncludeStack is used to store
  // preprocessing control stacks for the current file and all its
  // parent files.  The back() element is the preprocessing control
  // stack for the current file.
  std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
      PrepIncludeStack;

  // Validate that the current preprocessing control stack is empty,
  // since we are about to exit a file, and pop the include stack.
  //
  // If IncludeStackMustBeEmpty is true, the include stack must be empty
  // after the popping, otherwise, the include stack must not be empty
  // after the popping.  Basically, the include stack must be empty
  // only if we exit the "top-level" file (i.e. finish lexing).
  //
  // The method returns false, if the current preprocessing control stack
  // is not empty (e.g. there is an unterminated #ifdef/#else),
  // true - otherwise.
  bool prepExitInclude(bool IncludeStackMustBeEmpty);

  // Look ahead for a preprocessing directive starting from CurPtr.  The caller
  // must only call this method, if *(CurPtr - 1) is '#'.  If the method matches
  // a preprocessing directive word followed by a whitespace, then it returns
  // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
  //
  // CurPtr is not adjusted by this method.
  tgtok::TokKind prepIsDirective() const;

  // Given a preprocessing token kind, adjusts CurPtr to the end
  // of the preprocessing directive word.  Returns true, unless
  // an unsupported token kind is passed in.
  //
  // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
  // to avoid adjusting CurPtr before we are sure that '#' is followed
  // by a preprocessing directive.  If it is not, then we fall back to
  // tgtok::paste interpretation of '#'.
  bool prepEatPreprocessorDirective(tgtok::TokKind Kind);

  // The main "exit" point from the token parsing to preprocessor.
  //
  // The method is called for CurPtr, when prepIsDirective() returns
  // true.  The first parameter matches the result of prepIsDirective(),
  // denoting the actual preprocessor directive to be processed.
  //
  // If the preprocessing directive disables the tokens processing, e.g.:
  //     #ifdef NAME // NAME is undefined
  // then lexPreprocessor() enters the lines-skipping mode.
  // In this mode, it does not parse any tokens, because the code under
  // the #ifdef may not even be a correct tablegen code.  The preprocessor
  // looks for lines containing other preprocessing directives, which
  // may be prepended with whitespaces and C-style comments.  If the line
  // does not contain a preprocessing directive, it is skipped completely.
  // Otherwise, the preprocessing directive is processed by recursively
  // calling lexPreprocessor().  The processing of the encountered
  // preprocessing directives includes updating preprocessing control stack
  // and adding new macros into DefinedMacros set.
  //
  // The second parameter controls whether lexPreprocessor() is called from
  // LexToken() (true) or recursively from lexPreprocessor() (false).
  //
  // If ReturnNextLiveToken is true, the method returns the next
  // LEX token following the current directive or following the end
  // of the disabled preprocessing region corresponding to this directive.
  // If ReturnNextLiveToken is false, the method returns the first parameter,
  // unless there were errors encountered in the disabled preprocessing
  // region - in this case, it returns tgtok::Error.
  tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
                                 bool ReturnNextLiveToken = true);

  // Worker method for lexPreprocessor() to skip lines after some
  // preprocessing directive up to the buffer end or to the directive
  // that re-enables token processing.  The method returns true
  // upon processing the next directive that re-enables tokens
  // processing.  False is returned if an error was encountered.
  //
  // Note that prepSkipRegion() calls lexPreprocessor() to process
  // encountered preprocessing directives.  In this case, the second
  // parameter to lexPreprocessor() is set to false.  Being passed
  // false ReturnNextLiveToken, lexPreprocessor() must never call
  // prepSkipRegion().  We assert this by passing ReturnNextLiveToken
  // to prepSkipRegion() and checking that it is never set to false.
  bool prepSkipRegion(bool MustNeverBeFalse);

  // Lex name of the macro after either #ifdef or #define.  We could have used
  // LexIdentifier(), but it has special handling of "include" word, which
  // could result in awkward diagnostic errors.  Consider:
  // ----
  // #ifdef include
  // class ...
  // ----
  // LexIdentifier() will engage LexInclude(), which will complain about
  // missing file with name "class".  Instead, prepLexMacroName() will treat
  // "include" as a normal macro name.
  //
  // On entry, CurPtr points to the end of a preprocessing directive word.
  // The method allows for whitespaces between the preprocessing directive
  // and the macro name.  The allowed whitespaces are ' ' and '\t'.
  //
  // If the first non-whitespace symbol after the preprocessing directive
  // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
  // the method updates TokStart to the position of the first non-whitespace
  // symbol, sets CurPtr to the position of the macro name's last symbol,
  // and returns a string reference to the macro name.  Otherwise,
  // TokStart is set to the first non-whitespace symbol after the preprocessing
  // directive, and the method returns an empty string reference.
  //
  // In all cases, TokStart may be used to point to the word following
  // the preprocessing directive.
  StringRef prepLexMacroName();

  // Skip any whitespaces starting from CurPtr.  The method is used
  // only in the lines-skipping mode to find the first non-whitespace
  // symbol after or at CurPtr.  Allowed whitespaces are ' ', '\t', '\n'
  // and '\r'.  The method skips C-style comments as well, because
  // it is used to find the beginning of the preprocessing directive.
  // If we do not handle C-style comments the following code would
  // result in incorrect detection of a preprocessing directive:
  //     /*
  //     #ifdef NAME
  //     */
  // As long as we skip C-style comments, the following code is correctly
  // recognized as a preprocessing directive:
  //     /* first line comment
  //        second line comment */ #ifdef NAME
  //
  // The method returns true upon reaching the first non-whitespace symbol
  // or EOF, CurPtr is set to point to this symbol.  The method returns false,
  // if an error occured during skipping of a C-style comment.
  bool prepSkipLineBegin();

  // Skip any whitespaces or comments after a preprocessing directive.
  // The method returns true upon reaching either end of the line
  // or end of the file.  If there is a multiline C-style comment
  // after the preprocessing directive, the method skips
  // the comment, so the final CurPtr may point to one of the next lines.
  // The method returns false, if an error occured during skipping
  // C- or C++-style comment, or a non-whitespace symbol appears
  // after the preprocessing directive.
  //
  // The method maybe called both during lines-skipping and tokens
  // processing.  It actually verifies that only whitespaces or/and
  // comments follow a preprocessing directive.
  //
  // After the execution of this mehod, CurPtr points either to new line
  // symbol, buffer end or non-whitespace symbol following the preprocesing
  // directive.
  bool prepSkipDirectiveEnd();

  // Skip all symbols to the end of the line/file.
  // The method adjusts CurPtr, so that it points to either new line
  // symbol in the current line or the buffer end.
  void prepSkipToLineEnd();

  // Return true, if the current preprocessor control stack is such that
  // we should allow lexer to process the next token, false - otherwise.
  //
  // In particular, the method returns true, if all the #ifdef/#else
  // controls on the stack have their IsDefined member set to true.
  bool prepIsProcessingEnabled();

  // Report an error, if we reach EOF with non-empty preprocessing control
  // stack.  This means there is no matching #endif for the previous
  // #ifdef/#else.
  void prepReportPreprocessorStackError();
};

} // end namespace llvm

#endif