reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This library measures the distance between file paths.
// It's used for ranking symbols, e.g. in code completion.
//  |foo/bar.h -> foo/bar.h| = 0.
//  |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|.
// This is an edit-distance, where edits go up or down the directory tree.
// It's not symmetrical, the costs of going up and down may not match.
//
// Dealing with multiple sources:
// In practice we care about the distance from a source file, but files near
// its main-header and #included files are considered "close".
// So we start with a set of (anchor, cost) pairs, and call the distance to a
// path the minimum of `cost + |source -> path|`.
//
// We allow each source to limit the number of up-traversals paths may start
// with. Up-traversals may reach things that are not "semantically near".
//
// Symbol URI schemes:
// Symbol locations may be represented by URIs rather than file paths directly.
// In this case we want to perform distance computations in URI space rather
// than in file-space, without performing redundant conversions.
// Therefore we have a lookup structure that accepts URIs, so that intermediate
// calculations for the same scheme can be reused.
//
// Caveats:
// Assuming up and down traversals each have uniform costs is simplistic.
// Often there are "semantic roots" whose children are almost unrelated.
// (e.g. /usr/include/, or / in an umbrella repository). We ignore this.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H

#include "URI.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/StringSaver.h"
#include <memory>

namespace clang {
namespace clangd {

struct FileDistanceOptions {
  unsigned UpCost = 2;                    // |foo/bar.h -> foo|
  unsigned DownCost = 1;                  // |foo -> foo/bar.h|
  unsigned IncludeCost = 2;               // |foo.cc -> included_header.h|
  bool AllowDownTraversalFromRoot = true; // | / -> /a |
};

struct SourceParams {
  // Base cost for paths starting at this source.
  unsigned Cost = 0;
  // Limits the number of upwards traversals allowed from this source.
  unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max();
};

// Supports lookups to find the minimum distance to a file from any source.
// This object should be reused, it memoizes intermediate computations.
class FileDistance {
public:
  static constexpr unsigned Unreachable = std::numeric_limits<unsigned>::max();
  static const llvm::hash_code RootHash;

  FileDistance(llvm::StringMap<SourceParams> Sources,
               const FileDistanceOptions &Opts = {});

  // Computes the minimum distance from any source to the file path.
  unsigned distance(llvm::StringRef Path);

private:
  // Costs computed so far. Always contains sources and their ancestors.
  // We store hash codes only. Collisions are rare and consequences aren't dire.
  llvm::DenseMap<llvm::hash_code, unsigned> Cache;
  FileDistanceOptions Opts;
};

// Supports lookups like FileDistance, but the lookup keys are URIs.
// We convert each of the sources to the scheme of the URI and do a FileDistance
// comparison on the bodies.
class URIDistance {
public:
  // \p Sources must contain absolute paths, not URIs.
  URIDistance(llvm::StringMap<SourceParams> Sources,
              const FileDistanceOptions &Opts = {})
      : Sources(Sources), Opts(Opts) {}

  // Computes the minimum distance from any source to the URI.
  // Only sources that can be mapped into the URI's scheme are considered.
  unsigned distance(llvm::StringRef URI);

private:
  // Returns the FileDistance for a URI scheme, creating it if needed.
  FileDistance &forScheme(llvm::StringRef Scheme);

  // We cache the results using the original strings so we can skip URI parsing.
  llvm::DenseMap<llvm::hash_code, unsigned> Cache;
  llvm::StringMap<SourceParams> Sources;
  llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme;
  FileDistanceOptions Opts;
};

/// Support lookups like FileDistance, but the lookup keys are symbol scopes.
/// For example, a scope "na::nb::" is converted to "/na/nb".
class ScopeDistance {
public:
  /// QueryScopes[0] is the preferred scope.
  ScopeDistance(llvm::ArrayRef<std::string> QueryScopes);

  unsigned distance(llvm::StringRef SymbolScope);

private:
  FileDistance Distance;
};

} // namespace clangd
} // namespace clang

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H