reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
//===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "FuzzySymbolIndex.h"
#include "llvm/Support/Regex.h"

using clang::find_all_symbols::SymbolAndSignals;
using llvm::StringRef;

namespace clang {
namespace include_fixer {
namespace {

class MemSymbolIndex : public FuzzySymbolIndex {
public:
  MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) {
    for (auto &Symbol : Symbols) {
      auto Tokens = tokenize(Symbol.Symbol.getName());
      this->Symbols.emplace_back(
          StringRef(llvm::join(Tokens.begin(), Tokens.end(), " ")),
          std::move(Symbol));
    }
  }

  std::vector<SymbolAndSignals> search(StringRef Query) override {
    auto Tokens = tokenize(Query);
    llvm::Regex Pattern("^" + queryRegexp(Tokens));
    std::vector<SymbolAndSignals> Results;
    for (const Entry &E : Symbols)
      if (Pattern.match(E.first))
        Results.push_back(E.second);
    return Results;
  }

private:
  using Entry = std::pair<llvm::SmallString<32>, SymbolAndSignals>;
  std::vector<Entry> Symbols;
};

// Helpers for tokenize state machine.
enum TokenizeState {
  EMPTY,      // No pending characters.
  ONE_BIG,    // Read one uppercase letter, could be WORD or Word.
  BIG_WORD,   // Reading an uppercase WORD.
  SMALL_WORD, // Reading a lowercase word.
  NUMBER      // Reading a number.
};

enum CharType { UPPER, LOWER, DIGIT, MISC };
CharType classify(char c) {
  if (isupper(c))
    return UPPER;
  if (islower(c))
    return LOWER;
  if (isdigit(c))
    return DIGIT;
  return MISC;
}

} // namespace

std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) {
  std::vector<std::string> Result;
  // State describes the treatment of text from Start to I.
  // Once text is Flush()ed into Result, we're done with it and advance Start.
  TokenizeState State = EMPTY;
  size_t Start = 0;
  auto Flush = [&](size_t End) {
    if (State != EMPTY) {
      Result.push_back(Text.substr(Start, End - Start).lower());
      State = EMPTY;
    }
    Start = End;
  };
  for (size_t I = 0; I < Text.size(); ++I) {
    CharType Type = classify(Text[I]);
    if (Type == MISC)
      Flush(I);
    else if (Type == LOWER)
      switch (State) {
      case BIG_WORD:
        Flush(I - 1); // FOOBar: first token is FOO, not FOOB.
        LLVM_FALLTHROUGH;
      case ONE_BIG:
        State = SMALL_WORD;
        LLVM_FALLTHROUGH;
      case SMALL_WORD:
        break;
      default:
        Flush(I);
        State = SMALL_WORD;
      }
    else if (Type == UPPER)
      switch (State) {
      case ONE_BIG:
        State = BIG_WORD;
        LLVM_FALLTHROUGH;
      case BIG_WORD:
        break;
      default:
        Flush(I);
        State = ONE_BIG;
      }
    else if (Type == DIGIT && State != NUMBER) {
      Flush(I);
      State = NUMBER;
    }
  }
  Flush(Text.size());
  return Result;
}

std::string
FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) {
  std::string Result;
  for (size_t I = 0; I < Tokens.size(); ++I) {
    if (I)
      Result.append("[[:alnum:]]* ");
    for (size_t J = 0; J < Tokens[I].size(); ++J) {
      if (J)
        Result.append("([[:alnum:]]* )?");
      Result.push_back(Tokens[I][J]);
    }
  }
  return Result;
}

llvm::Expected<std::unique_ptr<FuzzySymbolIndex>>
FuzzySymbolIndex::createFromYAML(StringRef FilePath) {
  auto Buffer = llvm::MemoryBuffer::getFile(FilePath);
  if (!Buffer)
    return llvm::errorCodeToError(Buffer.getError());
  return std::make_unique<MemSymbolIndex>(
      find_all_symbols::ReadSymbolInfosFromYAML(Buffer.get()->getBuffer()));
}

} // namespace include_fixer
} // namespace clang