1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
| //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Defines the basic structure of the syntax tree. There are two kinds of nodes:
// - leaf nodes correspond to a token in the expanded token stream,
// - tree nodes correspond to language grammar constructs.
//
// The tree is initially built from an AST. Each node of a newly built tree
// covers a continous subrange of expanded tokens (i.e. tokens after
// preprocessing), the specific tokens coverered are stored in the leaf nodes of
// a tree. A post-order traversal of a tree will visit leaf nodes in an order
// corresponding the original order of expanded tokens.
//
// This is still work in progress and highly experimental, we leave room for
// ourselves to completely change the design and/or implementation.
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
#define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Allocator.h"
#include <cstdint>
namespace clang {
namespace syntax {
/// A memory arena for syntax trees. Also tracks the underlying token buffers,
/// source manager, etc.
class Arena {
public:
Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
TokenBuffer Tokens);
const SourceManager &sourceManager() const { return SourceMgr; }
const LangOptions &langOptions() const { return LangOpts; }
const TokenBuffer &tokenBuffer() const;
llvm::BumpPtrAllocator &allocator() { return Allocator; }
/// Add \p Buffer to the underlying source manager, tokenize it and store the
/// resulting tokens. Useful when there is a need to materialize tokens that
/// were not written in user code.
std::pair<FileID, llvm::ArrayRef<syntax::Token>>
lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
private:
SourceManager &SourceMgr;
const LangOptions &LangOpts;
TokenBuffer Tokens;
/// IDs and storage for additional tokenized files.
llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens;
/// Keeps all the allocated nodes and their intermediate data structures.
llvm::BumpPtrAllocator Allocator;
};
class Tree;
class TreeBuilder;
enum class NodeKind : uint16_t;
enum class NodeRole : uint8_t;
/// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
/// a Tree (representing language constructrs).
class Node {
public:
/// Newly created nodes are detached from a tree, parent and sibling links are
/// set when the node is added as a child to another one.
Node(NodeKind Kind);
NodeKind kind() const { return static_cast<NodeKind>(Kind); }
NodeRole role() const { return static_cast<NodeRole>(Role); }
const Tree *parent() const { return Parent; }
Tree *parent() { return Parent; }
const Node *nextSibling() const { return NextSibling; }
Node *nextSibling() { return NextSibling; }
/// Dumps the structure of a subtree. For debugging and testing purposes.
std::string dump(const Arena &A) const;
/// Dumps the tokens forming this subtree.
std::string dumpTokens(const Arena &A) const;
private:
// Tree is allowed to change the Parent link and Role.
friend class Tree;
Tree *Parent;
Node *NextSibling;
unsigned Kind : 16;
unsigned Role : 8;
};
/// A leaf node points to a single token inside the expanded token stream.
class Leaf final : public Node {
public:
Leaf(const syntax::Token *T);
static bool classof(const Node *N);
const syntax::Token *token() const { return Tok; }
private:
const syntax::Token *Tok;
};
/// A node that has children and represents a syntactic language construct.
class Tree : public Node {
public:
using Node::Node;
static bool classof(const Node *N);
Node *firstChild() { return FirstChild; }
const Node *firstChild() const { return FirstChild; }
protected:
/// Find the first node with a corresponding role.
syntax::Node *findChild(NodeRole R);
private:
/// Prepend \p Child to the list of children and and sets the parent pointer.
/// A very low-level operation that does not check any invariants, only used
/// by TreeBuilder.
/// EXPECTS: Role != NodeRoleDetached.
void prependChildLowLevel(Node *Child, NodeRole Role);
friend class TreeBuilder;
Node *FirstChild = nullptr;
};
} // namespace syntax
} // namespace clang
#endif
|