reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
//===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// Some operations such as code completion produce a set of candidates.
/// Usually the user can choose between them, but we should put the best options
/// at the top (they're easier to select, and more likely to be seen).
///
/// This file defines building blocks for ranking candidates.
/// It's used by the features directly and also in the implementation of
/// indexes, as indexes also need to heuristically limit their results.
///
/// The facilities here are:
///   - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
///     These are structured in a way that they can be debugged, and are fairly
///     consistent regardless of the source.
///   - compute scores from scoring signals. These are suitable for sorting.
///   - sorting utilities like the TopN container.
/// These could be split up further to isolate dependencies if we care.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H

#include "ExpectedTypes.h"
#include "FileDistance.h"
#include "clang/Sema/CodeCompleteConsumer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include <algorithm>
#include <functional>
#include <vector>

namespace llvm {
class raw_ostream;
} // namespace llvm

namespace clang {
class CodeCompletionResult;

namespace clangd {

struct Symbol;
class URIDistance;

// Signals structs are designed to be aggregated from 0 or more sources.
// A default instance has neutral signals, and sources are merged into it.
// They can be dumped for debugging, and evaluate()d into a score.

/// Attributes of a symbol that affect how much we like it.
struct SymbolQualitySignals {
  bool Deprecated = false;
  bool ReservedName = false; // __foo, _Foo are usually implementation details.
                             // FIXME: make these findable once user types _.
  bool ImplementationDetail = false;
  unsigned References = 0;

  enum SymbolCategory {
    Unknown = 0,
    Variable,
    Macro,
    Type,
    Function,
    Constructor,
    Destructor,
    Namespace,
    Keyword,
    Operator,
  } Category = Unknown;

  void merge(const CodeCompletionResult &SemaCCResult);
  void merge(const Symbol &IndexResult);

  // Condense these signals down to a single number, higher is better.
  float evaluate() const;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
                              const SymbolQualitySignals &);

/// Attributes of a symbol-query pair that affect how much we like it.
struct SymbolRelevanceSignals {
  /// The name of the symbol (for ContextWords). Must be explicitly assigned.
  llvm::StringRef Name;
  /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
  float NameMatch = 1;
  /// Lowercase words relevant to the context (e.g. near the completion point).
  llvm::StringSet<>* ContextWords = nullptr;
  bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
  /// Whether fixits needs to be applied for that completion or not.
  bool NeedsFixIts = false;
  bool InBaseClass = false; // A member from base class of the accessed class.

  URIDistance *FileProximityMatch = nullptr;
  /// These are used to calculate proximity between the index symbol and the
  /// query.
  llvm::StringRef SymbolURI;
  /// FIXME: unify with index proximity score - signals should be
  /// source-independent.
  /// Proximity between best declaration and the query. [0-1], 1 is closest.
  float SemaFileProximityScore = 0;

  // Scope proximity is only considered (both index and sema) when this is set.
  ScopeDistance *ScopeProximityMatch = nullptr;
  llvm::Optional<llvm::StringRef> SymbolScope;
  // A symbol from sema should be accessible from the current scope.
  bool SemaSaysInScope = false;

  // An approximate measure of where we expect the symbol to be used.
  enum AccessibleScope {
    FunctionScope,
    ClassScope,
    FileScope,
    GlobalScope,
  } Scope = GlobalScope;

  enum QueryType {
    CodeComplete,
    Generic,
  } Query = Generic;

  CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other;

  // Whether symbol is an instance member of a class.
  bool IsInstanceMember = false;

  // Whether clang provided a preferred type in the completion context.
  bool HadContextType = false;
  // Whether a source completion item or a symbol had a type information.
  bool HadSymbolType = false;
  // Whether the item matches the type expected in the completion context.
  bool TypeMatchesPreferred = false;

  void merge(const CodeCompletionResult &SemaResult);
  void merge(const Symbol &IndexResult);

  // Condense these signals down to a single number, higher is better.
  float evaluate() const;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
                              const SymbolRelevanceSignals &);

/// Combine symbol quality and relevance into a single score.
float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);

/// TopN<T> is a lossy container that preserves only the "best" N elements.
template <typename T, typename Compare = std::greater<T>> class TopN {
public:
  using value_type = T;
  TopN(size_t N, Compare Greater = Compare())
      : N(N), Greater(std::move(Greater)) {}

  // Adds a candidate to the set.
  // Returns true if a candidate was dropped to get back under N.
  bool push(value_type &&V) {
    bool Dropped = false;
    if (Heap.size() >= N) {
      Dropped = true;
      if (N > 0 && Greater(V, Heap.front())) {
        std::pop_heap(Heap.begin(), Heap.end(), Greater);
        Heap.back() = std::move(V);
        std::push_heap(Heap.begin(), Heap.end(), Greater);
      }
    } else {
      Heap.push_back(std::move(V));
      std::push_heap(Heap.begin(), Heap.end(), Greater);
    }
    assert(Heap.size() <= N);
    assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
    return Dropped;
  }

  // Returns candidates from best to worst.
  std::vector<value_type> items() && {
    std::sort_heap(Heap.begin(), Heap.end(), Greater);
    assert(Heap.size() <= N);
    return std::move(Heap);
  }

private:
  const size_t N;
  std::vector<value_type> Heap; // Min-heap, comparator is Greater.
  Compare Greater;
};

/// Returns a string that sorts in the same order as (-Score, Tiebreak), for
/// LSP. (The highest score compares smallest so it sorts at the top).
std::string sortText(float Score, llvm::StringRef Tiebreak = "");

struct SignatureQualitySignals {
  uint32_t NumberOfParameters = 0;
  uint32_t NumberOfOptionalParameters = 0;
  CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind =
      CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
                              const SignatureQualitySignals &);

} // namespace clangd
} // namespace clang

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H