mirror of
https://github.com/RGBCube/cstree
synced 2025-07-27 17:17:45 +00:00
Set up a module structure (#44)
This commit is contained in:
parent
baa0a9f2f0
commit
16f7a3bd80
38 changed files with 2291 additions and 454 deletions
20
.github/workflows/main.yml
vendored
20
.github/workflows/main.yml
vendored
|
@ -31,8 +31,19 @@ jobs:
|
||||||
- uses: hecrj/setup-rust-action@v1
|
- uses: hecrj/setup-rust-action@v1
|
||||||
with:
|
with:
|
||||||
rust-version: ${{ matrix.rust }}
|
rust-version: ${{ matrix.rust }}
|
||||||
- run: cargo test --verbose --all-features
|
|
||||||
- run: cargo test --release --verbose --all-features
|
- uses: actions-rs/cargo@v1
|
||||||
|
with:
|
||||||
|
command: test
|
||||||
|
args: --all-targets --verbose
|
||||||
|
- uses: actions-rs/cargo@v1
|
||||||
|
with:
|
||||||
|
command: test
|
||||||
|
args: --all-targets --verbose --all-features
|
||||||
|
- uses: actions-rs/cargo@v1
|
||||||
|
with:
|
||||||
|
command: test
|
||||||
|
args: --all-targets --verbose --all-features --release
|
||||||
|
|
||||||
check:
|
check:
|
||||||
name: Check
|
name: Check
|
||||||
|
@ -48,6 +59,7 @@ jobs:
|
||||||
- uses: actions-rs/cargo@v1
|
- uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
command: check
|
command: check
|
||||||
|
args: --all-targets --all-features
|
||||||
|
|
||||||
clippy:
|
clippy:
|
||||||
name: Clippy
|
name: Clippy
|
||||||
|
@ -79,11 +91,13 @@ jobs:
|
||||||
name: Check doc links
|
name: Check doc links
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
env:
|
env:
|
||||||
RUSTDOCFLAGS: -Dwarnings
|
RUSTDOCFLAGS: -Dwarnings --cfg doc_cfg
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- uses: hecrj/setup-rust-action@v1
|
- uses: hecrj/setup-rust-action@v1
|
||||||
|
with:
|
||||||
|
rust-version: nightly
|
||||||
- run: cargo doc --all-features --document-private-items --no-deps
|
- run: cargo doc --all-features --document-private-items --no-deps
|
||||||
|
|
||||||
miri-test:
|
miri-test:
|
||||||
|
|
45
CHANGELOG.md
45
CHANGELOG.md
|
@ -2,7 +2,52 @@
|
||||||
|
|
||||||
## `v0.12.0`
|
## `v0.12.0`
|
||||||
|
|
||||||
|
* Documentation has been improved in most areas, together with a switch to a more principled module structure that allows explicitly documenting submodules.
|
||||||
|
* The `interning` module has been rewritten. It now provides fuctions for obtaining a default interner (`new_interner` and `new_threaded_interner`) and provides a small, dependency-free interner implementation.
|
||||||
|
* Compatibility with other interners can be enable via feature flags.
|
||||||
|
* **Note** that compatibilty with `lasso` is not enabled by default. Use the `lasso_compat` feature to match the previous default.
|
||||||
* Introduced `Language::static_text` to optimize tokens that always appear with the same text (estimated 10-15% faster tree building when used, depending on the ratio of static to dynamic tokens).
|
* Introduced `Language::static_text` to optimize tokens that always appear with the same text (estimated 10-15% faster tree building when used, depending on the ratio of static to dynamic tokens).
|
||||||
* Since `cstree`s are lossless, `GreenNodeBuilder::token` must still be passed the source text even for static tokens.
|
* Since `cstree`s are lossless, `GreenNodeBuilder::token` must still be passed the source text even for static tokens.
|
||||||
* Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements.
|
* Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements.
|
||||||
* Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option<SyntaxNode>` is now the same size as `SyntaxNode` itself: the size of a pointer).
|
* Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option<SyntaxNode>` is now the same size as `SyntaxNode` itself: the size of a pointer).
|
||||||
|
* `SyntaxKind` has been renamed to `RawSyntaxKind` to no longer conflict with user-defined `SyntaxKind` enumerations.
|
||||||
|
* The crate's export module structure has been reorganized to give different groups of definitions their own submodules. A `cstree::prelude` module is available, containing the most commonly needed types that were previously accessible via `use cstree::*`. Otherwise, the module structure is now as follows:
|
||||||
|
* `cstree`
|
||||||
|
* `Language`
|
||||||
|
* `RawSyntaxKind`
|
||||||
|
* `build`
|
||||||
|
* `GreenNodeBuilder`
|
||||||
|
* `NodeCache`
|
||||||
|
* `Checkpoint`
|
||||||
|
* `green`
|
||||||
|
* `GreenNode`
|
||||||
|
* `GreenToken`
|
||||||
|
* `GreenNodeChildren`
|
||||||
|
* `syntax`
|
||||||
|
* `{Syntax,Resolved}Node`
|
||||||
|
* `{Syntax,Resolved}Token`
|
||||||
|
* `{Syntax,Resolved}Element`
|
||||||
|
* `{Syntax,Resolved}ElementRef`
|
||||||
|
* `SyntaxNodeChildren`
|
||||||
|
* `SyntaxElementChildren`
|
||||||
|
* `SyntaxText`
|
||||||
|
* `interning`
|
||||||
|
* `TokenKey` and the `InternKey` trait
|
||||||
|
* `Interner` and `Resolver` traits
|
||||||
|
* `new_interner` and `TokenInterner`
|
||||||
|
* `new_threaded_interner` and `MultiThreadedTokenInterner` (with the `multi_threaded_interning` feature enabled)
|
||||||
|
* compatibility implementations for interning crates depending on selected feature flags
|
||||||
|
* `text`
|
||||||
|
* `TextSize`
|
||||||
|
* `TextRange`
|
||||||
|
* `SyntaxText` (re-export)
|
||||||
|
* `traversal`
|
||||||
|
* `Direction`
|
||||||
|
* `WalkEvent`
|
||||||
|
* `util`
|
||||||
|
* `NodeOrToken`
|
||||||
|
* `TokenAtOffset`
|
||||||
|
* `sync`
|
||||||
|
* `Arc`
|
||||||
|
* `prelude`
|
||||||
|
* re-exports of the most-used items
|
38
Cargo.toml
38
Cargo.toml
|
@ -1,7 +1,7 @@
|
||||||
[package]
|
[package]
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
name = "cstree"
|
name = "cstree"
|
||||||
version = "0.11.1"
|
version = "0.12.0-rc.0" # when updating, also update `#![doc(html_root_url)]`
|
||||||
authors = [
|
authors = [
|
||||||
"Domenic Quirl <DomenicQuirl@pm.me>",
|
"Domenic Quirl <DomenicQuirl@pm.me>",
|
||||||
"Aleksey Kladov <aleksey.kladov@gmail.com>",
|
"Aleksey Kladov <aleksey.kladov@gmail.com>",
|
||||||
|
@ -15,7 +15,6 @@ readme = "README.md"
|
||||||
debug = true
|
debug = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
lasso = { version = "0.6", features = ["inline-more", "multi-threaded"] }
|
|
||||||
text-size = "1.1.0"
|
text-size = "1.1.0"
|
||||||
fxhash = "0.2.1"
|
fxhash = "0.2.1"
|
||||||
parking_lot = "0.11.2"
|
parking_lot = "0.11.2"
|
||||||
|
@ -24,6 +23,20 @@ parking_lot = "0.11.2"
|
||||||
triomphe = "0.1.7"
|
triomphe = "0.1.7"
|
||||||
sptr = "0.3.2"
|
sptr = "0.3.2"
|
||||||
|
|
||||||
|
# Default Interner
|
||||||
|
indexmap = "1.9"
|
||||||
|
|
||||||
|
[dependencies.lasso]
|
||||||
|
version = "0.6"
|
||||||
|
features = ["inline-more"]
|
||||||
|
optional = true
|
||||||
|
|
||||||
|
[dependencies.salsa]
|
||||||
|
git = "https://github.com/salsa-rs/salsa/"
|
||||||
|
version = "0.1"
|
||||||
|
optional = true
|
||||||
|
package = "salsa-2022"
|
||||||
|
|
||||||
[dependencies.serde]
|
[dependencies.serde]
|
||||||
version = "1.0"
|
version = "1.0"
|
||||||
optional = true
|
optional = true
|
||||||
|
@ -43,7 +56,24 @@ harness = false
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
serialize = ["serde", "lasso/serialize"]
|
# Implementations of `serde::{De,}Serialize` for CSTrees.
|
||||||
|
serialize = ["serde", "lasso?/serialize"]
|
||||||
|
# Interoperability with the `lasso` interning crate.
|
||||||
|
# When enabled, `cstree`'s default interners will use `lasso` internally, too.
|
||||||
|
lasso_compat = ["lasso"]
|
||||||
|
# Additionally provide threadsafe interner types.
|
||||||
|
# Where applicable (and if the corresponding features are selected), provide compatibility
|
||||||
|
# implementations for multi-thread interners from other crates.
|
||||||
|
multi_threaded_interning = ["lasso_compat", "lasso/multi-threaded"]
|
||||||
|
# Interoperability with the `salsa` framework for incremental computation.
|
||||||
|
# Use this feature for "Salsa 2022".
|
||||||
|
# WARNING: This feature is considered unstable!
|
||||||
|
salsa_2022_compat = ["salsa"]
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "salsa"
|
||||||
|
required-features = ["salsa_2022_compat"]
|
||||||
|
|
||||||
[package.metadata.docs.rs]
|
[package.metadata.docs.rs]
|
||||||
features = ["serialize"]
|
all-features = true
|
||||||
|
rustdoc-args = ["--cfg", "doc_cfg"]
|
||||||
|
|
287
README.md
287
README.md
|
@ -32,8 +32,291 @@ Notable differences of `cstree` compared to `rowan`:
|
||||||
- Performance optimizations for tree traversal: persisting red nodes allows tree traversal methods to return references. You can still `clone` to obtain an owned node, but you only pay that cost when you need to.
|
- Performance optimizations for tree traversal: persisting red nodes allows tree traversal methods to return references. You can still `clone` to obtain an owned node, but you only pay that cost when you need to.
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
The main entry points for constructing syntax trees are `GreenNodeBuilder` and `SyntaxNode::new_root` for green and red trees respectively.
|
|
||||||
See `examples/s_expressions` for a guided tutorial to `cstree`.
|
If you're looking at `cstree`, you're probably looking at or already writing a parser and are considering using
|
||||||
|
concrete syntax trees as its output. We'll talk more about parsing below -- first, let's have a look at what needs
|
||||||
|
to happen to go from input text to a `cstree` syntax tree:
|
||||||
|
|
||||||
|
1. Define an enumeration of the types of tokens (like keywords) and nodes (like "an expression")
|
||||||
|
that you want to have in your syntax and implement `Language`
|
||||||
|
|
||||||
|
2. Create a `GreenNodeBuilder` and call `start_node`, `token` and `finish_node` from your parser
|
||||||
|
|
||||||
|
3. Call `SyntaxNode::new_root` or `SyntaxNode::new_root_with_resolver` with the resulting
|
||||||
|
`GreenNode` to obtain a syntax tree that you can traverse
|
||||||
|
|
||||||
|
Let's walk through the motions of parsing a (very) simple language into `cstree` syntax trees.
|
||||||
|
We'll just support addition and subtraction on integers, from which the user is allowed to construct a single,
|
||||||
|
compound expression. They will, however, be allowed to write nested expressions in parentheses, like `1 - (2 + 5)`.
|
||||||
|
|
||||||
|
### Defining the language
|
||||||
|
First, we need to list the different part of our language's grammar.
|
||||||
|
We can do that using an `enum` with a unit variant for any terminal and non-terminal.
|
||||||
|
The `enum` needs to be convertible to a `u16`, so we use the `repr` attribute to ensure it uses the correct
|
||||||
|
representation.
|
||||||
|
|
||||||
|
```rust
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
#[repr(u16)]
|
||||||
|
enum SyntaxKind {
|
||||||
|
/* Tokens */
|
||||||
|
Int, // 42
|
||||||
|
Plus, // +
|
||||||
|
Minus, // -
|
||||||
|
LParen, // (
|
||||||
|
RParen, // )
|
||||||
|
/* Nodes */
|
||||||
|
Expr,
|
||||||
|
Root,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Most of these are tokens to lex the input string into, like numbers (`Int`) and operators (`Plus`, `Minus`).
|
||||||
|
We only really need one type of node; expressions.
|
||||||
|
Our syntax tree's root node will have the special kind `Root`, all other nodes will be
|
||||||
|
expressions containing a sequence of arithmetic operations potentially involving further, nested
|
||||||
|
expression nodes.
|
||||||
|
|
||||||
|
To use our `SyntaxKind`s with `cstree`, we need to tell it how to convert it back to just a number (the
|
||||||
|
`#[repr(u16)]` that we added) by implementing the `Language` trait. We can also tell `cstree` about tokens that
|
||||||
|
always have the same text through the `static_text` method on the trait. This is useful for the operators and
|
||||||
|
parentheses, but not possible for numbers, since an integer token may be produced from the input `3`, but also from
|
||||||
|
other numbers like `7` or `12`. We implement `Language` on an empty type, just so we can give it a name.
|
||||||
|
|
||||||
|
```rust
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Calculator;
|
||||||
|
|
||||||
|
impl Language for Calculator {
|
||||||
|
// The tokens and nodes we just defined
|
||||||
|
type Kind = SyntaxKind;
|
||||||
|
|
||||||
|
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||||
|
// This just needs to be the inverse of `kind_to_raw`, but could also
|
||||||
|
// be an `impl TryFrom<u16> for SyntaxKind` or any other conversion.
|
||||||
|
match raw.0 {
|
||||||
|
0 => SyntaxKind::Int,
|
||||||
|
1 => SyntaxKind::Plus,
|
||||||
|
2 => SyntaxKind::Minus,
|
||||||
|
3 => SyntaxKind::LParen,
|
||||||
|
4 => SyntaxKind::RParen,
|
||||||
|
5 => SyntaxKind::Expr,
|
||||||
|
6 => SyntaxKind::Root,
|
||||||
|
n => panic!("Unknown raw syntax kind: {n}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||||
|
RawSyntaxKind(kind as u16)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||||
|
match kind {
|
||||||
|
SyntaxKind::Plus => Some("+"),
|
||||||
|
SyntaxKind::Minus => Some("-"),
|
||||||
|
SyntaxKind::LParen => Some("("),
|
||||||
|
SyntaxKind::RParen => Some(")"),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Parsing into a green tree
|
||||||
|
With that out of the way, we can start writing the parser for our expressions.
|
||||||
|
For the purposes of this introduction to `cstree`, I'll assume that there is a lexer that yields the following
|
||||||
|
tokens:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||||
|
pub enum Token<'input> {
|
||||||
|
// Note that number strings are not yet parsed into actual numbers,
|
||||||
|
// we just remember the slice of the input that contains their digits
|
||||||
|
Int(&'input str),
|
||||||
|
Plus,
|
||||||
|
Minus,
|
||||||
|
LParen,
|
||||||
|
RParen,
|
||||||
|
// A special token that indicates that we have reached the end of the file
|
||||||
|
EoF,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
A simple lexer that yields such tokens is part of the full `readme` example, but we'll be busy enough with the
|
||||||
|
combination of `cstree` and the actual parser, which we define like this:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
pub struct Parser<'input> {
|
||||||
|
// `Peekable` is a standard library iterator adapter that allows
|
||||||
|
// looking ahead at the next item without removing it from the iterator yet
|
||||||
|
lexer: Peekable<Lexer<'input>>,
|
||||||
|
builder: GreenNodeBuilder<'static, 'static, Calculator>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'input> Parser<'input> {
|
||||||
|
pub fn new(input: &'input str) -> Self {
|
||||||
|
Self {
|
||||||
|
// we get `peekable` from implementing `Iterator` on `Lexer`
|
||||||
|
lexer: Lexer::new(input).peekable(),
|
||||||
|
builder: GreenNodeBuilder::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn bump(&mut self) -> Option<Token<'input>> {
|
||||||
|
self.lexer.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
In contrast to parsers that return abstract syntax trees, with `cstree` the syntax tree nodes for
|
||||||
|
all element in the language grammar will have the same type: `GreenNode` for the inner ("green")
|
||||||
|
tree and `SyntaxNode` for the outer ("red") tree. Different kinds of nodes (and tokens) are
|
||||||
|
differentiated by their `SyntaxKind` tag, which we defined above.
|
||||||
|
|
||||||
|
You can implement many types of parsers with `cstree`. To get a feel for how it works, consider
|
||||||
|
a typical recursive descent parser. With a more traditional AST, one would define different AST
|
||||||
|
structs for struct or function definitions, statements, expressions and so on. Inside the
|
||||||
|
parser, the components of any element, such as all fields of a struct or all statements inside a
|
||||||
|
function, are parsed first and then the parser wraps them in the matching AST type, which is
|
||||||
|
returned from the corresponding parser function.
|
||||||
|
|
||||||
|
Because `cstree`'s syntax trees are untyped, there is no explicit AST representation that the parser
|
||||||
|
would build. Instead, parsing into a CST using the `GreenNodeBuilder` follows the source code more
|
||||||
|
closely in that you tell `cstree` about each new element you enter and all tokens that the parser
|
||||||
|
consumes. So, for example, to parse a struct definition the parser first "enters" the struct
|
||||||
|
definition node, then parses the `struct` keyword and type name, then parses each field, and finally
|
||||||
|
"finishes" parsing the struct node.
|
||||||
|
|
||||||
|
The most trivial example is the root node for our parser, which just creates a root node
|
||||||
|
containing the whole expression (we could do without a specific root node if any expression was
|
||||||
|
a node, in particular if we wrapped integer literal tokens inside `Expr` nodes).
|
||||||
|
|
||||||
|
```rust
|
||||||
|
pub fn parse(&mut self) -> Result<(), String> {
|
||||||
|
self.builder.start_node(SyntaxKind::Root);
|
||||||
|
self.parse_expr()?;
|
||||||
|
self.builder.finish_node();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
As there isn't a static AST type to return, the parser is very flexible as to what is part of a
|
||||||
|
node. In the previous example, if the user is adding a new field to the struct and has not yet
|
||||||
|
typed the field's type, the CST node for the struct doesn't care if there is no child node for
|
||||||
|
it. Similarly, if the user is deleting fields and the source code currently contains a leftover
|
||||||
|
field name, this additional identifier can be a part of the struct node without any
|
||||||
|
modifications to the syntax tree definition. This property is the key to why CSTs are such a
|
||||||
|
good fit as a lossless input representation, which necessitates the syntax tree to mirror the
|
||||||
|
user-specific layout of whitespace and comments around the AST items.
|
||||||
|
|
||||||
|
In the parser for our simple expression language, we'll also have to deal with the fact that,
|
||||||
|
when we see a number the parser doesn't yet know whether there will be additional operations
|
||||||
|
following that number. That is, in the expression `1 + 2`, it can only know that it is parsing
|
||||||
|
a binary operation once it sees the `+`. The event-like model of building trees in `cstree`,
|
||||||
|
however, implies that when reaching the `+`, the parser would have to have already entered an
|
||||||
|
expression node in order for the whole input to be part of the expression.
|
||||||
|
|
||||||
|
To get around this, `GreenNodeBuilder` provides the `checkpoint` method, which we can call to
|
||||||
|
"remember" the current position in the input. For example, we can create a checkpoint before the
|
||||||
|
parser parses the first `1`. Later, when it sees the following `+`, it can create an `Expr` node
|
||||||
|
for the whole expression using `start_node_at`:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
fn parse_lhs(&mut self) -> Result<(), String> {
|
||||||
|
// An expression may start either with a number, or with an opening parenthesis that is
|
||||||
|
// the start of a parenthesized expression
|
||||||
|
let next_token = *self.lexer.peek().unwrap();
|
||||||
|
match next_token {
|
||||||
|
Token::Int(n) => {
|
||||||
|
self.bump();
|
||||||
|
self.builder.token(SyntaxKind::Int, n);
|
||||||
|
}
|
||||||
|
Token::LParen => {
|
||||||
|
// Wrap the grouped expression inside a node containing it and its parentheses
|
||||||
|
self.builder.start_node(SyntaxKind::Expr);
|
||||||
|
self.bump();
|
||||||
|
self.builder.static_token(SyntaxKind::LParen);
|
||||||
|
self.parse_expr()?; // Inner expression
|
||||||
|
if self.bump() != Some(Token::RParen) {
|
||||||
|
return Err("Missing ')'".to_string());
|
||||||
|
}
|
||||||
|
self.builder.static_token(SyntaxKind::RParen);
|
||||||
|
self.builder.finish_node();
|
||||||
|
}
|
||||||
|
Token::EoF => return Err("Unexpected end of file: expected expression".to_string()),
|
||||||
|
t => return Err(format!("Unexpected start of expression: '{t:?}'")),
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_expr(&mut self) -> Result<(), String> {
|
||||||
|
// Remember our current position
|
||||||
|
let before_expr = self.builder.checkpoint();
|
||||||
|
|
||||||
|
// Parse the start of the expression
|
||||||
|
self.parse_lhs()?;
|
||||||
|
|
||||||
|
// Check if the expression continues with `+ <more>` or `- <more>`
|
||||||
|
let Some(next_token) = self.lexer.peek() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
let op = match *next_token {
|
||||||
|
Token::Plus => SyntaxKind::Plus,
|
||||||
|
Token::Minus => SyntaxKind::Minus,
|
||||||
|
Token::RParen | Token::EoF => return Ok(()),
|
||||||
|
t => return Err(format!("Expected operator, found '{t:?}'")),
|
||||||
|
};
|
||||||
|
|
||||||
|
// If so, retroactively wrap the (already parsed) LHS and the following RHS
|
||||||
|
// inside an `Expr` node
|
||||||
|
self.builder.start_node_at(before_expr, SyntaxKind::Expr);
|
||||||
|
self.bump();
|
||||||
|
self.builder.static_token(op);
|
||||||
|
self.parse_expr()?; // RHS
|
||||||
|
self.builder.finish_node();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Obtaining the parser result
|
||||||
|
|
||||||
|
Our parser is now capable of parsing our little arithmetic language, but it's methods don't return
|
||||||
|
anything. So how do we get our syntax tree out? The answer lies in `GreenNodeBuilder::finish`, which
|
||||||
|
finally returns the tree that we have painstakingly constructed.
|
||||||
|
|
||||||
|
```rust
|
||||||
|
impl Parser<'_> {
|
||||||
|
pub fn finish(mut self) -> (GreenNode, impl Interner) {
|
||||||
|
assert!(self.lexer.next().map(|t| t == Token::EoF).unwrap_or(true));
|
||||||
|
let (tree, cache) = self.builder.finish();
|
||||||
|
(tree, cache.unwrap().into_interner().unwrap())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
`finish` also returns the cache it used to deduplicate tree nodes and tokens, so you can re-use it
|
||||||
|
for parsing related inputs (e.g., different source files from the same crate may share a lot of
|
||||||
|
common function and type names that can be deduplicated). See `GreenNodeBuilder`'s documentation for
|
||||||
|
more information on this, in particular the `with_cache` and `from_cache` methods. Most importantly
|
||||||
|
for us, we can extract the `Interner` that contains the source text of the tree's tokens from the
|
||||||
|
cache, which we need if we want to look up things like variable names or the value of numbers for
|
||||||
|
our calculator.
|
||||||
|
|
||||||
|
To work with the syntax tree, you'll want to upgrade it to a `SyntaxNode` using
|
||||||
|
`SyntaxNode::new_root`. You can also use `SyntaxNode::new_root_with_resolver` to combine tree and
|
||||||
|
interner, which lets you directly retrieve source text and makes the nodes implement `Display` and
|
||||||
|
`Debug`. The same output can be produced from `SyntaxNode`s by calling the `debug` or `display`
|
||||||
|
method with a `Resolver`. To visualize the whole syntax tree, pass `true` for the `recursive`
|
||||||
|
parameter on `debug`, or simply debug-print a `ResolvedNode`:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
let input = "11 + 2-(5 + 4)";
|
||||||
|
let mut parser = Parser::new(input);
|
||||||
|
parser.parse().unwrap();
|
||||||
|
let (tree, interner) = parser.finish();
|
||||||
|
let root = SyntaxNode::<Calculator>::new_root_with_resolver(tree, interner);
|
||||||
|
dbg!(root);
|
||||||
|
```
|
||||||
|
|
||||||
## AST Layer
|
## AST Layer
|
||||||
While `cstree` is built for concrete syntax trees, applications are quite easily able to work with either a CST or an AST representation, or freely switch between them.
|
While `cstree` is built for concrete syntax trees, applications are quite easily able to work with either a CST or an AST representation, or freely switch between them.
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
|
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
|
||||||
use cstree::*;
|
use cstree::{
|
||||||
use lasso::{Interner, Rodeo};
|
build::*,
|
||||||
|
green::GreenNode,
|
||||||
|
interning::{new_interner, Interner},
|
||||||
|
Language, RawSyntaxKind,
|
||||||
|
};
|
||||||
use std::{fmt, hash::Hash};
|
use std::{fmt, hash::Hash};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -40,7 +44,7 @@ impl Bool for UseStaticText {
|
||||||
impl<T: Bool> Language for TestLang<T> {
|
impl<T: Bool> Language for TestLang<T> {
|
||||||
type Kind = TestKind;
|
type Kind = TestKind;
|
||||||
|
|
||||||
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
|
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||||
if raw.0 == u16::MAX - 1 {
|
if raw.0 == u16::MAX - 1 {
|
||||||
TestKind::Plus
|
TestKind::Plus
|
||||||
} else {
|
} else {
|
||||||
|
@ -48,10 +52,10 @@ impl<T: Bool> Language for TestLang<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||||
match kind {
|
match kind {
|
||||||
TestKind::Element { n } => SyntaxKind(n),
|
TestKind::Element { n } => RawSyntaxKind(n),
|
||||||
TestKind::Plus => SyntaxKind(u16::MAX - 1),
|
TestKind::Plus => RawSyntaxKind(u16::MAX - 1),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,7 +71,7 @@ impl<T: Bool> Language for TestLang<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_tree_with_cache<'c, 'i, T: Bool, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode
|
pub fn build_tree_with_cache<T: Bool, I>(root: &Element<'_>, cache: &mut NodeCache<'_, I>) -> GreenNode
|
||||||
where
|
where
|
||||||
I: Interner,
|
I: Interner,
|
||||||
{
|
{
|
||||||
|
@ -78,9 +82,9 @@ where
|
||||||
node
|
node
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_recursive<'c, 'i, T: Bool, I>(
|
pub fn build_recursive<T: Bool, I>(
|
||||||
root: &Element<'_>,
|
root: &Element<'_>,
|
||||||
builder: &mut GreenNodeBuilder<'c, 'i, TestLang<T>, I>,
|
builder: &mut GreenNodeBuilder<'_, '_, TestLang<T>, I>,
|
||||||
mut from: u16,
|
mut from: u16,
|
||||||
) -> u16
|
) -> u16
|
||||||
where
|
where
|
||||||
|
@ -95,7 +99,7 @@ where
|
||||||
builder.finish_node();
|
builder.finish_node();
|
||||||
}
|
}
|
||||||
Element::Token(text) => {
|
Element::Token(text) => {
|
||||||
builder.token(TestKind::Element { n: from }, *text);
|
builder.token(TestKind::Element { n: from }, text);
|
||||||
}
|
}
|
||||||
Element::Plus => {
|
Element::Plus => {
|
||||||
builder.token(TestKind::Plus, "+");
|
builder.token(TestKind::Plus, "+");
|
||||||
|
@ -114,10 +118,15 @@ fn two_level_tree() -> Element<'static> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn create(c: &mut Criterion) {
|
pub fn create(c: &mut Criterion) {
|
||||||
let mut group = c.benchmark_group("two-level tree");
|
#[cfg(not(feature = "lasso_compat"))]
|
||||||
|
const GROUP_NAME: &str = "two-level tree (default interner)";
|
||||||
|
#[cfg(feature = "lasso_compat")]
|
||||||
|
const GROUP_NAME: &str = "two-level tree (lasso)";
|
||||||
|
|
||||||
|
let mut group = c.benchmark_group(GROUP_NAME);
|
||||||
group.throughput(Throughput::Elements(1));
|
group.throughput(Throughput::Elements(1));
|
||||||
|
|
||||||
let mut interner = Rodeo::new();
|
let mut interner = new_interner();
|
||||||
let mut cache = NodeCache::with_interner(&mut interner);
|
let mut cache = NodeCache::with_interner(&mut interner);
|
||||||
let tree = two_level_tree();
|
let tree = two_level_tree();
|
||||||
|
|
||||||
|
|
|
@ -13,10 +13,7 @@
|
||||||
//! - "+" Token(Add)
|
//! - "+" Token(Add)
|
||||||
//! - "4" Token(Number)
|
//! - "4" Token(Number)
|
||||||
|
|
||||||
use cstree::{
|
use cstree::{build::GreenNodeBuilder, interning::Resolver, util::NodeOrToken};
|
||||||
interning::{IntoResolver, Resolver},
|
|
||||||
GreenNodeBuilder, NodeOrToken,
|
|
||||||
};
|
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
@ -36,7 +33,7 @@ enum SyntaxKind {
|
||||||
}
|
}
|
||||||
use SyntaxKind::*;
|
use SyntaxKind::*;
|
||||||
|
|
||||||
impl From<SyntaxKind> for cstree::SyntaxKind {
|
impl From<SyntaxKind> for cstree::RawSyntaxKind {
|
||||||
fn from(kind: SyntaxKind) -> Self {
|
fn from(kind: SyntaxKind) -> Self {
|
||||||
Self(kind as u16)
|
Self(kind as u16)
|
||||||
}
|
}
|
||||||
|
@ -47,12 +44,12 @@ enum Lang {}
|
||||||
impl cstree::Language for Lang {
|
impl cstree::Language for Lang {
|
||||||
type Kind = SyntaxKind;
|
type Kind = SyntaxKind;
|
||||||
|
|
||||||
fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
|
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||||
assert!(raw.0 <= Root as u16);
|
assert!(raw.0 <= Root as u16);
|
||||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||||
kind.into()
|
kind.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,12 +64,12 @@ impl cstree::Language for Lang {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type SyntaxNode = cstree::SyntaxNode<Lang>;
|
type SyntaxNode = cstree::syntax::SyntaxNode<Lang>;
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
type SyntaxToken = cstree::SyntaxToken<Lang>;
|
type SyntaxToken = cstree::syntax::SyntaxToken<Lang>;
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
type SyntaxElement = cstree::NodeOrToken<SyntaxNode, SyntaxToken>;
|
type SyntaxElement = cstree::util::NodeOrToken<SyntaxNode, SyntaxToken>;
|
||||||
type SyntaxElementRef<'a> = cstree::NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>;
|
type SyntaxElementRef<'a> = cstree::util::NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>;
|
||||||
|
|
||||||
struct Parser<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> {
|
struct Parser<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> {
|
||||||
builder: GreenNodeBuilder<'static, 'static, Lang>,
|
builder: GreenNodeBuilder<'static, 'static, Lang>,
|
||||||
|
@ -128,10 +125,7 @@ impl<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> Parser<'input, I> {
|
||||||
self.builder.finish_node();
|
self.builder.finish_node();
|
||||||
|
|
||||||
let (tree, cache) = self.builder.finish();
|
let (tree, cache) = self.builder.finish();
|
||||||
(
|
(SyntaxNode::new_root(tree), cache.unwrap().into_interner().unwrap())
|
||||||
SyntaxNode::new_root(tree),
|
|
||||||
cache.unwrap().into_interner().unwrap().into_resolver(),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
334
examples/readme.rs
Normal file
334
examples/readme.rs
Normal file
|
@ -0,0 +1,334 @@
|
||||||
|
use std::{io::Write, iter::Peekable};
|
||||||
|
|
||||||
|
use cstree::{
|
||||||
|
interning::Interner,
|
||||||
|
prelude::*,
|
||||||
|
syntax::{ResolvedElementRef, ResolvedNode},
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
#[repr(u16)]
|
||||||
|
pub enum SyntaxKind {
|
||||||
|
/* Tokens */
|
||||||
|
Int, // 42
|
||||||
|
Plus, // +
|
||||||
|
Minus, // -
|
||||||
|
LParen, // (
|
||||||
|
RParen, // )
|
||||||
|
/* Nodes */
|
||||||
|
Expr,
|
||||||
|
Root,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Calculator;
|
||||||
|
impl Language for Calculator {
|
||||||
|
// The tokens and nodes we just defined
|
||||||
|
type Kind = SyntaxKind;
|
||||||
|
|
||||||
|
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||||
|
// This just needs to be the inverse of `kind_to_raw`, but could also
|
||||||
|
// be an `impl TryFrom<u16> for SyntaxKind` or any other conversion.
|
||||||
|
match raw.0 {
|
||||||
|
0 => SyntaxKind::Int,
|
||||||
|
1 => SyntaxKind::Plus,
|
||||||
|
2 => SyntaxKind::Minus,
|
||||||
|
3 => SyntaxKind::LParen,
|
||||||
|
4 => SyntaxKind::RParen,
|
||||||
|
5 => SyntaxKind::Expr,
|
||||||
|
6 => SyntaxKind::Root,
|
||||||
|
n => panic!("Unknown raw syntax kind: {n}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||||
|
RawSyntaxKind(kind as u16)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||||
|
match kind {
|
||||||
|
SyntaxKind::Plus => Some("+"),
|
||||||
|
SyntaxKind::Minus => Some("-"),
|
||||||
|
SyntaxKind::LParen => Some("("),
|
||||||
|
SyntaxKind::RParen => Some(")"),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||||
|
pub enum Token<'input> {
|
||||||
|
Int(&'input str),
|
||||||
|
Plus,
|
||||||
|
Minus,
|
||||||
|
LParen,
|
||||||
|
RParen,
|
||||||
|
EoF,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Lexer<'input> {
|
||||||
|
input: &'input str,
|
||||||
|
at_eof: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'input> Lexer<'input> {
|
||||||
|
pub fn new(input: &'input str) -> Self {
|
||||||
|
Self { input, at_eof: false }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_token(&mut self) -> Result<Token<'input>, String> {
|
||||||
|
loop {
|
||||||
|
let Some(next_char) = self.input.chars().next() else {
|
||||||
|
self.at_eof = true;
|
||||||
|
return Ok(Token::EoF);
|
||||||
|
};
|
||||||
|
|
||||||
|
let token = match next_char {
|
||||||
|
'+' => Token::Plus,
|
||||||
|
'-' => Token::Minus,
|
||||||
|
'(' => Token::LParen,
|
||||||
|
')' => Token::RParen,
|
||||||
|
c if c.is_ascii_digit() => {
|
||||||
|
let (last_digit_idx, _char) = self
|
||||||
|
.input
|
||||||
|
.char_indices()
|
||||||
|
.take_while(|(_idx, c)| c.is_ascii_digit())
|
||||||
|
.last()
|
||||||
|
.expect("matched at least one");
|
||||||
|
// Advance lexer
|
||||||
|
let number = Token::Int(&self.input[..=last_digit_idx]);
|
||||||
|
self.input = &self.input[(last_digit_idx + 1)..];
|
||||||
|
return Ok(number);
|
||||||
|
}
|
||||||
|
c if c.is_whitespace() => {
|
||||||
|
// Skip whitespace
|
||||||
|
let (last_ws_idx, _char) = self
|
||||||
|
.input
|
||||||
|
.char_indices()
|
||||||
|
.take_while(|(_idx, c)| c.is_whitespace())
|
||||||
|
.last()
|
||||||
|
.expect("matched at least one");
|
||||||
|
// Advance lexer
|
||||||
|
self.input = &self.input[(last_ws_idx + 1)..];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
c => return Err(format!("Unknown start of token: '{c}'")),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Advance lexer
|
||||||
|
self.input = &self.input[1..];
|
||||||
|
return Ok(token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'input> Iterator for Lexer<'input> {
|
||||||
|
type Item = Token<'input>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if self.at_eof {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(self.next_token().expect("Failed to lex input"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Parser<'input> {
|
||||||
|
lexer: Peekable<Lexer<'input>>,
|
||||||
|
builder: GreenNodeBuilder<'static, 'static, Calculator>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'input> Parser<'input> {
|
||||||
|
pub fn new(input: &'input str) -> Self {
|
||||||
|
Self {
|
||||||
|
lexer: Lexer::new(input).peekable(),
|
||||||
|
builder: GreenNodeBuilder::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn bump(&mut self) -> Option<Token<'input>> {
|
||||||
|
self.lexer.next()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse(&mut self) -> Result<(), String> {
|
||||||
|
self.builder.start_node(SyntaxKind::Root);
|
||||||
|
self.parse_expr()?;
|
||||||
|
self.builder.finish_node();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_lhs(&mut self) -> Result<(), String> {
|
||||||
|
// An expression may start either with a number, or with an opening parenthesis that is the start of a
|
||||||
|
// parenthesized expression
|
||||||
|
let next_token = *self.lexer.peek().unwrap();
|
||||||
|
match next_token {
|
||||||
|
Token::Int(n) => {
|
||||||
|
self.bump();
|
||||||
|
self.builder.token(SyntaxKind::Int, n);
|
||||||
|
}
|
||||||
|
Token::LParen => {
|
||||||
|
// Wrap the grouped expression inside a node containing it and its parentheses
|
||||||
|
self.builder.start_node(SyntaxKind::Expr);
|
||||||
|
self.bump();
|
||||||
|
self.builder.static_token(SyntaxKind::LParen);
|
||||||
|
self.parse_expr()?; // Inner expression
|
||||||
|
if self.bump() != Some(Token::RParen) {
|
||||||
|
return Err("Missing ')'".to_string());
|
||||||
|
}
|
||||||
|
self.builder.static_token(SyntaxKind::RParen);
|
||||||
|
self.builder.finish_node();
|
||||||
|
}
|
||||||
|
Token::EoF => return Err("Unexpected end of file: expected expression".to_string()),
|
||||||
|
t => return Err(format!("Unexpected start of expression: '{t:?}'")),
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_expr(&mut self) -> Result<(), String> {
|
||||||
|
// Remember our current position
|
||||||
|
let before_expr = self.builder.checkpoint();
|
||||||
|
|
||||||
|
// Parse the start of the expression
|
||||||
|
self.parse_lhs()?;
|
||||||
|
|
||||||
|
// Check if the expression continues with `+ <more>` or `- <more>`
|
||||||
|
let Some(next_token) = self.lexer.peek() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
let op = match *next_token {
|
||||||
|
Token::Plus => SyntaxKind::Plus,
|
||||||
|
Token::Minus => SyntaxKind::Minus,
|
||||||
|
Token::RParen | Token::EoF => return Ok(()),
|
||||||
|
t => return Err(format!("Expected operator, found '{t:?}'")),
|
||||||
|
};
|
||||||
|
|
||||||
|
// If so, retroactively wrap the (already parsed) LHS and the following RHS inside an `Expr` node
|
||||||
|
self.builder.start_node_at(before_expr, SyntaxKind::Expr);
|
||||||
|
self.bump();
|
||||||
|
self.builder.static_token(op);
|
||||||
|
self.parse_expr()?; // RHS
|
||||||
|
self.builder.finish_node();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finish(mut self) -> (GreenNode, impl Interner) {
|
||||||
|
assert!(self.lexer.next().map(|t| t == Token::EoF).unwrap_or(true));
|
||||||
|
let (tree, cache) = self.builder.finish();
|
||||||
|
(tree, cache.unwrap().into_interner().unwrap())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
use std::io;
|
||||||
|
|
||||||
|
let mut buf = String::new();
|
||||||
|
loop {
|
||||||
|
print!("Enter expression: ");
|
||||||
|
io::stdout().flush().unwrap();
|
||||||
|
buf.clear();
|
||||||
|
if let Err(e) = io::stdin().read_line(&mut buf) {
|
||||||
|
eprintln!("Error reading input: {e}");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let mut parser = Parser::new(&buf);
|
||||||
|
if let Err(e) = parser.parse() {
|
||||||
|
eprintln!("Parse error: {e}");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let (tree, interner) = parser.finish();
|
||||||
|
let root = SyntaxNode::<Calculator>::new_root_with_resolver(tree, interner);
|
||||||
|
|
||||||
|
if let Some(expr) = root.first_child_or_token() {
|
||||||
|
let result = eval_elem(expr, &mut root.children_with_tokens());
|
||||||
|
println!("Result: {result}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eval(expr: &ResolvedNode<Calculator>) -> i64 {
|
||||||
|
let mut children = expr.children_with_tokens();
|
||||||
|
let lhs = eval_elem(children.next().expect("empty expr"), &mut children);
|
||||||
|
let Some(op) = children.next().map(|elem| elem.kind()) else {
|
||||||
|
// Literal expression
|
||||||
|
return lhs;
|
||||||
|
};
|
||||||
|
let rhs = eval_elem(children.next().expect("missing RHS"), &mut children);
|
||||||
|
|
||||||
|
match op {
|
||||||
|
SyntaxKind::Plus => lhs + rhs,
|
||||||
|
SyntaxKind::Minus => lhs - rhs,
|
||||||
|
_ => unreachable!("invalid op"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eval_elem<'e>(
|
||||||
|
expr: ResolvedElementRef<'_, Calculator>,
|
||||||
|
children: &mut impl Iterator<Item = ResolvedElementRef<'e, Calculator>>,
|
||||||
|
) -> i64 {
|
||||||
|
use cstree::util::NodeOrToken;
|
||||||
|
|
||||||
|
match expr {
|
||||||
|
NodeOrToken::Node(n) => {
|
||||||
|
assert_eq!(n.kind(), SyntaxKind::Expr);
|
||||||
|
eval(n)
|
||||||
|
}
|
||||||
|
NodeOrToken::Token(t) => match t.kind() {
|
||||||
|
SyntaxKind::Int => {
|
||||||
|
let number_str = t.text();
|
||||||
|
number_str.parse().expect("parsed int could not be evaluated")
|
||||||
|
}
|
||||||
|
SyntaxKind::LParen => {
|
||||||
|
let inner = children.next().expect("missing content inside parens");
|
||||||
|
// It's important that we consume the `)` here, as otherwise `eval` might mistake it for an operator
|
||||||
|
assert_eq!(
|
||||||
|
children
|
||||||
|
.next()
|
||||||
|
.and_then(|elem| elem.into_token())
|
||||||
|
.map(|token| token.kind()),
|
||||||
|
Some(SyntaxKind::RParen)
|
||||||
|
);
|
||||||
|
eval_elem(inner, children)
|
||||||
|
}
|
||||||
|
_ => unreachable!("invalid start of expression"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn lex() {
|
||||||
|
let input = "11 + 2-(5 + 4)";
|
||||||
|
let lexer = Lexer::new(input);
|
||||||
|
let tokens: Vec<_> = lexer.into_iter().collect();
|
||||||
|
assert_eq!(
|
||||||
|
tokens,
|
||||||
|
vec![
|
||||||
|
Token::Int("11"),
|
||||||
|
Token::Plus,
|
||||||
|
Token::Int("2"),
|
||||||
|
Token::Minus,
|
||||||
|
Token::LParen,
|
||||||
|
Token::Int("5"),
|
||||||
|
Token::Plus,
|
||||||
|
Token::Int("4"),
|
||||||
|
Token::RParen,
|
||||||
|
Token::EoF
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse() {
|
||||||
|
let input = "11 + 2-(5 + 4)";
|
||||||
|
let mut parser = Parser::new(input);
|
||||||
|
parser.parse().unwrap();
|
||||||
|
let (tree, interner) = parser.finish();
|
||||||
|
let root = SyntaxNode::<Calculator>::new_root_with_resolver(tree, interner);
|
||||||
|
dbg!(root);
|
||||||
|
}
|
||||||
|
}
|
|
@ -30,7 +30,7 @@ use SyntaxKind::*;
|
||||||
/// in order to not need the user's `enum SyntaxKind` as a type parameter.
|
/// in order to not need the user's `enum SyntaxKind` as a type parameter.
|
||||||
///
|
///
|
||||||
/// First, to easily pass the enum variants into cstree via `.into()`:
|
/// First, to easily pass the enum variants into cstree via `.into()`:
|
||||||
impl From<SyntaxKind> for cstree::SyntaxKind {
|
impl From<SyntaxKind> for cstree::RawSyntaxKind {
|
||||||
fn from(kind: SyntaxKind) -> Self {
|
fn from(kind: SyntaxKind) -> Self {
|
||||||
Self(kind as u16)
|
Self(kind as u16)
|
||||||
}
|
}
|
||||||
|
@ -44,12 +44,12 @@ pub enum Lang {}
|
||||||
impl cstree::Language for Lang {
|
impl cstree::Language for Lang {
|
||||||
type Kind = SyntaxKind;
|
type Kind = SyntaxKind;
|
||||||
|
|
||||||
fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
|
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||||
assert!(raw.0 <= Root as u16);
|
assert!(raw.0 <= Root as u16);
|
||||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||||
kind.into()
|
kind.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,14 +66,11 @@ impl cstree::Language for Lang {
|
||||||
/// offsets and parent pointers.
|
/// offsets and parent pointers.
|
||||||
/// cstree also deduplicates the actual source string in addition to the tree nodes, so we will need
|
/// cstree also deduplicates the actual source string in addition to the tree nodes, so we will need
|
||||||
/// the Resolver to get the real text back from the interned representation.
|
/// the Resolver to get the real text back from the interned representation.
|
||||||
use cstree::{
|
use cstree::{green::GreenNode, interning::Resolver, Language};
|
||||||
interning::{IntoResolver, Resolver},
|
|
||||||
GreenNode, Language,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// You can construct GreenNodes by hand, but a builder is helpful for top-down parsers: it maintains
|
/// You can construct GreenNodes by hand, but a builder is helpful for top-down parsers: it maintains
|
||||||
/// a stack of currently in-progress nodes.
|
/// a stack of currently in-progress nodes.
|
||||||
use cstree::GreenNodeBuilder;
|
use cstree::build::GreenNodeBuilder;
|
||||||
|
|
||||||
/// The parse results are stored as a "green tree".
|
/// The parse results are stored as a "green tree".
|
||||||
/// We'll discuss how to work with the results later.
|
/// We'll discuss how to work with the results later.
|
||||||
|
@ -135,7 +132,7 @@ fn parse(text: &str) -> Parse<impl Resolver> {
|
||||||
let (tree, cache) = self.builder.finish();
|
let (tree, cache) = self.builder.finish();
|
||||||
Parse {
|
Parse {
|
||||||
green_node: tree,
|
green_node: tree,
|
||||||
resolver: cache.unwrap().into_interner().unwrap().into_resolver(),
|
resolver: cache.unwrap().into_interner().unwrap(),
|
||||||
errors: self.errors,
|
errors: self.errors,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -213,11 +210,11 @@ fn parse(text: &str) -> Parse<impl Resolver> {
|
||||||
/// To work with the parse results we need a view into the green tree - the syntax tree.
|
/// To work with the parse results we need a view into the green tree - the syntax tree.
|
||||||
/// It is also immutable, like a GreenNode, but it contains parent pointers, offsets, and has
|
/// It is also immutable, like a GreenNode, but it contains parent pointers, offsets, and has
|
||||||
/// identity semantics.
|
/// identity semantics.
|
||||||
type SyntaxNode = cstree::SyntaxNode<Lang>;
|
type SyntaxNode = cstree::syntax::SyntaxNode<Lang>;
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
type SyntaxToken = cstree::SyntaxToken<Lang>;
|
type SyntaxToken = cstree::syntax::SyntaxToken<Lang>;
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
type SyntaxElement = cstree::SyntaxElement<Lang>;
|
type SyntaxElement = cstree::syntax::SyntaxElement<Lang>;
|
||||||
|
|
||||||
impl<I> Parse<I> {
|
impl<I> Parse<I> {
|
||||||
fn syntax(&self) -> SyntaxNode {
|
fn syntax(&self) -> SyntaxNode {
|
||||||
|
@ -355,8 +352,10 @@ impl ast::Atom {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn text<'r>(&self, resolver: &'r impl Resolver) -> &'r str {
|
fn text<'r>(&self, resolver: &'r impl Resolver) -> &'r str {
|
||||||
match &self.0.green().children().next() {
|
use cstree::util::NodeOrToken;
|
||||||
Some(cstree::NodeOrToken::Token(token)) => Lang::static_text(Lang::kind_from_raw(token.kind()))
|
|
||||||
|
match self.0.green().children().next() {
|
||||||
|
Some(NodeOrToken::Token(token)) => Lang::static_text(Lang::kind_from_raw(token.kind()))
|
||||||
.or_else(|| token.text(resolver))
|
.or_else(|| token.text(resolver))
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
|
@ -422,7 +421,7 @@ nan
|
||||||
/// Split the input string into a flat list of tokens (such as L_PAREN, WORD, and WHITESPACE)
|
/// Split the input string into a flat list of tokens (such as L_PAREN, WORD, and WHITESPACE)
|
||||||
fn lex(text: &str) -> VecDeque<(SyntaxKind, &str)> {
|
fn lex(text: &str) -> VecDeque<(SyntaxKind, &str)> {
|
||||||
fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
|
fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
|
||||||
m_lexer::TokenKind(cstree::SyntaxKind::from(t).0)
|
m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0)
|
||||||
}
|
}
|
||||||
fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
|
fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
|
||||||
match t.0 {
|
match t.0 {
|
||||||
|
|
50
examples/salsa.rs
Normal file
50
examples/salsa.rs
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
#![cfg(feature = "salsa_2022_compat")]
|
||||||
|
|
||||||
|
use cstree::{build::GreenNodeBuilder, impl_cstree_interning_for_salsa};
|
||||||
|
|
||||||
|
#[salsa::jar(db = Db)]
|
||||||
|
pub struct Jar(crate::SourceId);
|
||||||
|
|
||||||
|
pub trait Db: salsa::DbWithJar<Jar> {}
|
||||||
|
impl<DB> Db for DB where DB: ?Sized + salsa::DbWithJar<Jar> {}
|
||||||
|
|
||||||
|
#[salsa::interned]
|
||||||
|
pub struct SourceId {
|
||||||
|
#[return_ref]
|
||||||
|
pub text: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
#[salsa::db(crate::Jar)]
|
||||||
|
struct Database {
|
||||||
|
storage: salsa::Storage<Self>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl salsa::Database for Database {}
|
||||||
|
|
||||||
|
impl_cstree_interning_for_salsa!(impl Interning for Database => text as SourceId);
|
||||||
|
|
||||||
|
use cstree::{syntax::SyntaxNode, testing::*};
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let db = Database::default();
|
||||||
|
let interned = SourceId::new(&db, "foo".to_string());
|
||||||
|
let original = interned.text(&db);
|
||||||
|
assert_eq!(original, "foo");
|
||||||
|
|
||||||
|
let interner = db.as_interner();
|
||||||
|
let mut shared_interner = &interner;
|
||||||
|
let mut builder: GreenNodeBuilder<TestLang, _> = GreenNodeBuilder::with_interner(&mut shared_interner);
|
||||||
|
let (tree, _no_interner_because_it_was_borrowed) = {
|
||||||
|
builder.start_node(TestSyntaxKind::Plus);
|
||||||
|
builder.token(TestSyntaxKind::Float, "2.05");
|
||||||
|
builder.token(TestSyntaxKind::Whitespace, " ");
|
||||||
|
builder.token(TestSyntaxKind::Plus, "+");
|
||||||
|
builder.token(TestSyntaxKind::Whitespace, " ");
|
||||||
|
builder.token(TestSyntaxKind::Float, "7.32");
|
||||||
|
builder.finish_node();
|
||||||
|
builder.finish()
|
||||||
|
};
|
||||||
|
let tree: SyntaxNode<TestLang> = SyntaxNode::new_root(tree);
|
||||||
|
assert_eq!(tree.resolve_text(shared_interner), "2.05 + 7.32");
|
||||||
|
}
|
19
src/green.rs
19
src/green.rs
|
@ -1,10 +1,9 @@
|
||||||
//! Implementation of the inner, "green" tree.
|
//! Implementation of the inner, "green" tree.
|
||||||
//! The [`GreenNodeBuilder`] is the main entry point to constructing [`GreenNode`]s and
|
//! The [`GreenNodeBuilder`](crate::build::GreenNodeBuilder) from the [`build` module](crate::build) is the main entry
|
||||||
//! [`GreenToken`]s.
|
//! point to constructing [`GreenNode`]s and [`GreenToken`]s.
|
||||||
|
|
||||||
mod builder;
|
pub(super) mod builder;
|
||||||
mod element;
|
mod element;
|
||||||
mod interner;
|
|
||||||
mod iter;
|
mod iter;
|
||||||
mod node;
|
mod node;
|
||||||
mod token;
|
mod token;
|
||||||
|
@ -12,17 +11,7 @@ mod token;
|
||||||
pub(crate) use self::element::GreenElementRef;
|
pub(crate) use self::element::GreenElementRef;
|
||||||
use self::element::{GreenElement, PackedGreenElement};
|
use self::element::{GreenElement, PackedGreenElement};
|
||||||
|
|
||||||
pub use self::{
|
pub use self::{iter::GreenNodeChildren, node::GreenNode, token::GreenToken};
|
||||||
builder::{Checkpoint, GreenNodeBuilder, NodeCache},
|
|
||||||
interner::TokenInterner,
|
|
||||||
iter::GreenNodeChildren,
|
|
||||||
node::GreenNode,
|
|
||||||
token::GreenToken,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// SyntaxKind is a type tag for each token or node.
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
||||||
pub struct SyntaxKind(pub u16);
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
|
@ -4,10 +4,11 @@ use fxhash::{FxHashMap, FxHasher32};
|
||||||
use text_size::TextSize;
|
use text_size::TextSize;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
green::{interner::TokenInterner, GreenElement, GreenNode, GreenToken, SyntaxKind},
|
green::{GreenElement, GreenNode, GreenToken},
|
||||||
interning::{Interner, Key},
|
interning::{new_interner, Interner, TokenInterner, TokenKey},
|
||||||
|
util::NodeOrToken,
|
||||||
utility_types::MaybeOwned,
|
utility_types::MaybeOwned,
|
||||||
Language, NodeOrToken,
|
Language, RawSyntaxKind,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{node::GreenNodeHead, token::GreenTokenData};
|
use super::{node::GreenNodeHead, token::GreenTokenData};
|
||||||
|
@ -35,6 +36,8 @@ impl NodeCache<'static> {
|
||||||
/// # Examples
|
/// # Examples
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::testing::{*, Language as _};
|
/// # use cstree::testing::{*, Language as _};
|
||||||
|
/// use cstree::build::NodeCache;
|
||||||
|
///
|
||||||
/// // Build a tree
|
/// // Build a tree
|
||||||
/// let mut cache = NodeCache::new();
|
/// let mut cache = NodeCache::new();
|
||||||
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::with_cache(&mut cache);
|
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::with_cache(&mut cache);
|
||||||
|
@ -53,7 +56,7 @@ impl NodeCache<'static> {
|
||||||
Self {
|
Self {
|
||||||
nodes: FxHashMap::default(),
|
nodes: FxHashMap::default(),
|
||||||
tokens: FxHashMap::default(),
|
tokens: FxHashMap::default(),
|
||||||
interner: MaybeOwned::Owned(TokenInterner::new()),
|
interner: MaybeOwned::Owned(new_interner()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -66,19 +69,21 @@ impl Default for NodeCache<'static> {
|
||||||
|
|
||||||
impl<'i, I> NodeCache<'i, I>
|
impl<'i, I> NodeCache<'i, I>
|
||||||
where
|
where
|
||||||
I: Interner,
|
I: Interner<TokenKey>,
|
||||||
{
|
{
|
||||||
/// Constructs a new, empty cache that will use the given interner to deduplicate source text
|
/// Constructs a new, empty cache that will use the given interner to deduplicate source text
|
||||||
/// (strings) across tokens.
|
/// (strings) across tokens.
|
||||||
/// # Examples
|
/// # Examples
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::testing::{*, Language as _};
|
/// # use cstree::testing::{*, Language as _};
|
||||||
/// use lasso::Rodeo;
|
/// # use cstree::interning::*;
|
||||||
|
/// use cstree::build::NodeCache;
|
||||||
///
|
///
|
||||||
/// // Create the builder from a custom `Rodeo`
|
/// // Create the builder from a custom interner
|
||||||
/// let mut interner = Rodeo::new();
|
/// let mut interner = new_interner();
|
||||||
/// let mut cache = NodeCache::with_interner(&mut interner);
|
/// let mut cache = NodeCache::with_interner(&mut interner);
|
||||||
/// let mut builder: GreenNodeBuilder<MyLanguage, Rodeo> = GreenNodeBuilder::with_cache(&mut cache);
|
/// let mut builder: GreenNodeBuilder<MyLanguage, TokenInterner> =
|
||||||
|
/// GreenNodeBuilder::with_cache(&mut cache);
|
||||||
///
|
///
|
||||||
/// // Construct the tree
|
/// // Construct the tree
|
||||||
/// # builder.start_node(Root);
|
/// # builder.start_node(Root);
|
||||||
|
@ -107,12 +112,14 @@ where
|
||||||
/// # Examples
|
/// # Examples
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::testing::{*, Language as _};
|
/// # use cstree::testing::{*, Language as _};
|
||||||
/// use lasso::Rodeo;
|
/// # use cstree::interning::*;
|
||||||
|
/// use cstree::build::NodeCache;
|
||||||
///
|
///
|
||||||
/// // Create the builder from a custom `Rodeo`
|
/// // Create the builder from a custom interner
|
||||||
/// let mut interner = Rodeo::new();
|
/// let mut interner = new_interner();
|
||||||
/// let cache = NodeCache::from_interner(interner);
|
/// let cache = NodeCache::from_interner(interner);
|
||||||
/// let mut builder: GreenNodeBuilder<MyLanguage, Rodeo> = GreenNodeBuilder::from_cache(cache);
|
/// let mut builder: GreenNodeBuilder<MyLanguage, TokenInterner> =
|
||||||
|
/// GreenNodeBuilder::from_cache(cache);
|
||||||
///
|
///
|
||||||
/// // Construct the tree
|
/// // Construct the tree
|
||||||
/// # builder.start_node(Root);
|
/// # builder.start_node(Root);
|
||||||
|
@ -142,22 +149,23 @@ where
|
||||||
/// See also [`interner_mut`](NodeCache::interner_mut).
|
/// See also [`interner_mut`](NodeCache::interner_mut).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn interner(&self) -> &I {
|
pub fn interner(&self) -> &I {
|
||||||
&*self.interner
|
&self.interner
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get a mutable reference to the interner used to deduplicate source text (strings).
|
/// Get a mutable reference to the interner used to deduplicate source text (strings).
|
||||||
/// # Examples
|
/// # Examples
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::*;
|
/// # use cstree::*;
|
||||||
|
/// # use cstree::build::*;
|
||||||
/// # use cstree::interning::*;
|
/// # use cstree::interning::*;
|
||||||
/// let mut cache = NodeCache::new();
|
/// let mut cache = NodeCache::new();
|
||||||
/// let interner = cache.interner_mut();
|
/// let interner = cache.interner_mut();
|
||||||
/// let key = interner.get_or_intern("foo");
|
/// let key = interner.get_or_intern("foo");
|
||||||
/// assert_eq!(interner.resolve(&key), "foo");
|
/// assert_eq!(interner.resolve(key), "foo");
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn interner_mut(&mut self) -> &mut I {
|
pub fn interner_mut(&mut self) -> &mut I {
|
||||||
&mut *self.interner
|
&mut self.interner
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If this node cache was constructed with [`new`](NodeCache::new) or
|
/// If this node cache was constructed with [`new`](NodeCache::new) or
|
||||||
|
@ -196,7 +204,7 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn intern(&mut self, text: &str) -> Key {
|
fn intern(&mut self, text: &str) -> TokenKey {
|
||||||
self.interner.get_or_intern(text)
|
self.interner.get_or_intern(text)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,7 +213,7 @@ where
|
||||||
#[inline]
|
#[inline]
|
||||||
fn get_cached_node(
|
fn get_cached_node(
|
||||||
&mut self,
|
&mut self,
|
||||||
kind: SyntaxKind,
|
kind: RawSyntaxKind,
|
||||||
children: std::vec::Drain<'_, GreenElement>,
|
children: std::vec::Drain<'_, GreenElement>,
|
||||||
text_len: TextSize,
|
text_len: TextSize,
|
||||||
child_hash: u32,
|
child_hash: u32,
|
||||||
|
@ -221,7 +229,7 @@ where
|
||||||
.clone()
|
.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn token<L: Language>(&mut self, kind: L::Kind, text: Option<Key>, len: u32) -> GreenToken {
|
fn token<L: Language>(&mut self, kind: L::Kind, text: Option<TokenKey>, len: u32) -> GreenToken {
|
||||||
let text_len = TextSize::from(len);
|
let text_len = TextSize::from(len);
|
||||||
let kind = L::kind_to_raw(kind);
|
let kind = L::kind_to_raw(kind);
|
||||||
let data = GreenTokenData { kind, text, text_len };
|
let data = GreenTokenData { kind, text, text_len };
|
||||||
|
@ -246,7 +254,6 @@ pub struct Checkpoint(usize);
|
||||||
/// # Examples
|
/// # Examples
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::testing::{*, Language as _};
|
/// # use cstree::testing::{*, Language as _};
|
||||||
/// # use cstree::interning::IntoResolver;
|
|
||||||
/// // Build a tree
|
/// // Build a tree
|
||||||
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
|
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
|
||||||
/// builder.start_node(Root);
|
/// builder.start_node(Root);
|
||||||
|
@ -258,7 +265,7 @@ pub struct Checkpoint(usize);
|
||||||
/// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root));
|
/// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root));
|
||||||
/// let int = tree.children().next().unwrap();
|
/// let int = tree.children().next().unwrap();
|
||||||
/// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int));
|
/// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int));
|
||||||
/// let resolver = cache.unwrap().into_interner().unwrap().into_resolver();
|
/// let resolver = cache.unwrap().into_interner().unwrap();
|
||||||
/// assert_eq!(int.as_token().unwrap().text(&resolver), Some("42"));
|
/// assert_eq!(int.as_token().unwrap().text(&resolver), Some("42"));
|
||||||
/// ```
|
/// ```
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -288,7 +295,7 @@ impl<L: Language> Default for GreenNodeBuilder<'static, 'static, L> {
|
||||||
impl<'cache, 'interner, L, I> GreenNodeBuilder<'cache, 'interner, L, I>
|
impl<'cache, 'interner, L, I> GreenNodeBuilder<'cache, 'interner, L, I>
|
||||||
where
|
where
|
||||||
L: Language,
|
L: Language,
|
||||||
I: Interner,
|
I: Interner<TokenKey>,
|
||||||
{
|
{
|
||||||
/// Reusing a [`NodeCache`] between multiple builders saves memory, as it allows to structurally
|
/// Reusing a [`NodeCache`] between multiple builders saves memory, as it allows to structurally
|
||||||
/// share underlying trees.
|
/// share underlying trees.
|
||||||
|
@ -306,6 +313,7 @@ where
|
||||||
/// # Examples
|
/// # Examples
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::testing::{*, Language as _};
|
/// # use cstree::testing::{*, Language as _};
|
||||||
|
/// # use cstree::build::*;
|
||||||
/// // Construct a builder from our own cache
|
/// // Construct a builder from our own cache
|
||||||
/// let cache = NodeCache::new();
|
/// let cache = NodeCache::new();
|
||||||
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::from_cache(cache);
|
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::from_cache(cache);
|
||||||
|
@ -358,7 +366,7 @@ where
|
||||||
/// See also [`interner_mut`](GreenNodeBuilder::interner_mut).
|
/// See also [`interner_mut`](GreenNodeBuilder::interner_mut).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn interner(&self) -> &I {
|
pub fn interner(&self) -> &I {
|
||||||
&*self.cache.interner
|
&self.cache.interner
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get a mutable reference to the interner used to deduplicate source text (strings).
|
/// Get a mutable reference to the interner used to deduplicate source text (strings).
|
||||||
|
@ -367,20 +375,19 @@ where
|
||||||
/// # Examples
|
/// # Examples
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::testing::*;
|
/// # use cstree::testing::*;
|
||||||
|
/// # use cstree::build::*;
|
||||||
/// # use cstree::interning::*;
|
/// # use cstree::interning::*;
|
||||||
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
|
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
|
||||||
/// let interner = builder.interner_mut();
|
/// let interner = builder.interner_mut();
|
||||||
/// let key = interner.get_or_intern("foo");
|
/// let key = interner.get_or_intern("foo");
|
||||||
/// assert_eq!(interner.resolve(&key), "foo");
|
/// assert_eq!(interner.resolve(key), "foo");
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn interner_mut(&mut self) -> &mut I {
|
pub fn interner_mut(&mut self) -> &mut I {
|
||||||
&mut *self.cache.interner
|
&mut self.cache.interner
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add a new token to the current branch without storing an explicit section of text.
|
/// Add a new token with the given `text` to the current node.
|
||||||
/// This is be useful if the text can always be inferred from the token's `kind`, for example
|
|
||||||
/// when using kinds for specific operators or punctuation.
|
|
||||||
///
|
///
|
||||||
/// ## Panics
|
/// ## Panics
|
||||||
/// In debug mode, if `kind` has static text, this function will verify that `text` matches that text.
|
/// In debug mode, if `kind` has static text, this function will verify that `text` matches that text.
|
||||||
|
@ -403,6 +410,22 @@ where
|
||||||
self.children.push(token.into());
|
self.children.push(token.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add a new token to the current node without storing an explicit section of text.
|
||||||
|
/// This is be useful if the text can always be inferred from the token's `kind`, for example
|
||||||
|
/// when using kinds for specific operators or punctuation.
|
||||||
|
///
|
||||||
|
/// For tokens whose textual representation is not static, such as numbers or identifiers, use
|
||||||
|
/// [`token`](GreenNodeBuilder::token).
|
||||||
|
///
|
||||||
|
/// ## Panics
|
||||||
|
/// If `kind` does not have static text, i.e., `L::static_text(kind)` returns `None`.
|
||||||
|
#[inline]
|
||||||
|
pub fn static_token(&mut self, kind: L::Kind) {
|
||||||
|
let static_text = L::static_text(kind).unwrap_or_else(|| panic!("Missing static text for '{kind:?}'"));
|
||||||
|
let token = self.cache.token::<L>(kind, None, static_text.len() as u32);
|
||||||
|
self.children.push(token.into());
|
||||||
|
}
|
||||||
|
|
||||||
/// Start new node of the given `kind` and make it current.
|
/// Start new node of the given `kind` and make it current.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn start_node(&mut self, kind: L::Kind) {
|
pub fn start_node(&mut self, kind: L::Kind) {
|
||||||
|
@ -427,7 +450,7 @@ where
|
||||||
/// # Examples
|
/// # Examples
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::testing::*;
|
/// # use cstree::testing::*;
|
||||||
/// # use cstree::{GreenNodeBuilder, Language};
|
/// # use cstree::{build::GreenNodeBuilder, Language};
|
||||||
/// # struct Parser;
|
/// # struct Parser;
|
||||||
/// # impl Parser {
|
/// # impl Parser {
|
||||||
/// # fn peek(&self) -> Option<TestSyntaxKind> { None }
|
/// # fn peek(&self) -> Option<TestSyntaxKind> { None }
|
||||||
|
|
|
@ -7,8 +7,10 @@ type ErasedPtr = *const u8;
|
||||||
use sptr::Strict;
|
use sptr::Strict;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
green::{GreenNode, GreenToken, SyntaxKind},
|
green::{GreenNode, GreenToken},
|
||||||
NodeOrToken, TextSize,
|
text::TextSize,
|
||||||
|
util::NodeOrToken,
|
||||||
|
RawSyntaxKind,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub(super) type GreenElement = NodeOrToken<GreenNode, GreenToken>;
|
pub(super) type GreenElement = NodeOrToken<GreenNode, GreenToken>;
|
||||||
|
@ -64,7 +66,7 @@ impl From<GreenToken> for PackedGreenElement {
|
||||||
impl GreenElement {
|
impl GreenElement {
|
||||||
/// Returns kind of this element.
|
/// Returns kind of this element.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn kind(&self) -> SyntaxKind {
|
pub fn kind(&self) -> RawSyntaxKind {
|
||||||
self.as_ref().kind()
|
self.as_ref().kind()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,7 +80,7 @@ impl GreenElement {
|
||||||
impl GreenElementRef<'_> {
|
impl GreenElementRef<'_> {
|
||||||
/// Returns kind of this element.
|
/// Returns kind of this element.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn kind(&self) -> SyntaxKind {
|
pub fn kind(&self) -> RawSyntaxKind {
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.kind(),
|
NodeOrToken::Node(it) => it.kind(),
|
||||||
NodeOrToken::Token(it) => it.kind(),
|
NodeOrToken::Token(it) => it.kind(),
|
||||||
|
|
|
@ -1,126 +0,0 @@
|
||||||
use std::num::NonZeroUsize;
|
|
||||||
|
|
||||||
use crate::interning::{
|
|
||||||
Capacity, Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Key, Reader, Resolver, Rodeo,
|
|
||||||
};
|
|
||||||
use fxhash::FxBuildHasher;
|
|
||||||
|
|
||||||
/// The default [`Interner`] used to deduplicate green token strings.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct TokenInterner {
|
|
||||||
rodeo: Rodeo,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TokenInterner {
|
|
||||||
pub(super) fn new() -> Self {
|
|
||||||
Self {
|
|
||||||
rodeo: Rodeo::with_capacity_and_hasher(
|
|
||||||
// capacity values suggested by author of `lasso`
|
|
||||||
Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }),
|
|
||||||
FxBuildHasher::default(),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Resolver for TokenInterner {
|
|
||||||
#[inline]
|
|
||||||
fn resolve<'a>(&'a self, key: &Key) -> &'a str {
|
|
||||||
self.rodeo.resolve(key)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn try_resolve<'a>(&'a self, key: &Key) -> Option<&'a str> {
|
|
||||||
self.rodeo.try_resolve(key)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
unsafe fn resolve_unchecked<'a>(&'a self, key: &Key) -> &'a str {
|
|
||||||
self.rodeo.resolve_unchecked(key)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn contains_key(&self, key: &Key) -> bool {
|
|
||||||
self.rodeo.contains_key(key)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn len(&self) -> usize {
|
|
||||||
self.rodeo.len()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Reader for TokenInterner {
|
|
||||||
#[inline]
|
|
||||||
fn get(&self, val: &str) -> Option<Key> {
|
|
||||||
self.rodeo.get(val)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn contains(&self, val: &str) -> bool {
|
|
||||||
self.rodeo.contains(val)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IntoResolver for TokenInterner {
|
|
||||||
type Resolver = <Rodeo as IntoResolver>::Resolver;
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn into_resolver(self) -> Self::Resolver
|
|
||||||
where
|
|
||||||
Self: 'static,
|
|
||||||
{
|
|
||||||
self.rodeo.into_resolver()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn into_resolver_boxed(self: Box<Self>) -> Self::Resolver
|
|
||||||
where
|
|
||||||
Self: 'static,
|
|
||||||
{
|
|
||||||
Rodeo::into_resolver_boxed(Box::new(self.rodeo))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Interner for TokenInterner {
|
|
||||||
#[inline]
|
|
||||||
fn get_or_intern(&mut self, val: &str) -> Key {
|
|
||||||
self.rodeo.get_or_intern(val)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn try_get_or_intern(&mut self, val: &str) -> lasso::LassoResult<Key> {
|
|
||||||
self.rodeo.try_get_or_intern(val)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn get_or_intern_static(&mut self, val: &'static str) -> Key {
|
|
||||||
self.rodeo.get_or_intern_static(val)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn try_get_or_intern_static(&mut self, val: &'static str) -> lasso::LassoResult<Key> {
|
|
||||||
self.rodeo.try_get_or_intern_static(val)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IntoReader for TokenInterner {
|
|
||||||
type Reader = <Rodeo as IntoReader>::Reader;
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn into_reader(self) -> Self::Reader
|
|
||||||
where
|
|
||||||
Self: 'static,
|
|
||||||
{
|
|
||||||
self.rodeo.into_reader()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_reader_boxed(self: Box<Self>) -> Self::Reader
|
|
||||||
where
|
|
||||||
Self: 'static,
|
|
||||||
{
|
|
||||||
Rodeo::into_reader_boxed(Box::new(self.rodeo))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IntoReaderAndResolver for TokenInterner {}
|
|
|
@ -4,7 +4,7 @@ use std::{iter::FusedIterator, slice};
|
||||||
|
|
||||||
use super::{element::PackedGreenElement, GreenElementRef};
|
use super::{element::PackedGreenElement, GreenElementRef};
|
||||||
|
|
||||||
/// An iterator over a [`GreenNode`](crate::GreenNode)'s children.
|
/// An iterator over a [`GreenNode`](crate::green::GreenNode)'s children.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct GreenNodeChildren<'a> {
|
pub struct GreenNodeChildren<'a> {
|
||||||
pub(super) inner: slice::Iter<'a, PackedGreenElement>,
|
pub(super) inner: slice::Iter<'a, PackedGreenElement>,
|
||||||
|
|
|
@ -6,15 +6,16 @@ use std::{
|
||||||
use fxhash::FxHasher32;
|
use fxhash::FxHasher32;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
green::{iter::GreenNodeChildren, GreenElement, PackedGreenElement, SyntaxKind},
|
green::{iter::GreenNodeChildren, GreenElement, PackedGreenElement},
|
||||||
TextSize,
|
text::TextSize,
|
||||||
|
RawSyntaxKind,
|
||||||
};
|
};
|
||||||
use triomphe::{Arc, HeaderWithLength, ThinArc};
|
use triomphe::{Arc, HeaderWithLength, ThinArc};
|
||||||
|
|
||||||
#[repr(align(2))] //to use 1 bit for pointer tagging. NB: this is an at-least annotation
|
#[repr(align(2))] //to use 1 bit for pointer tagging. NB: this is an at-least annotation
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
pub(super) struct GreenNodeHead {
|
pub(super) struct GreenNodeHead {
|
||||||
pub(super) kind: SyntaxKind,
|
pub(super) kind: RawSyntaxKind,
|
||||||
pub(super) text_len: TextSize,
|
pub(super) text_len: TextSize,
|
||||||
pub(super) child_hash: u32,
|
pub(super) child_hash: u32,
|
||||||
}
|
}
|
||||||
|
@ -35,7 +36,7 @@ impl std::fmt::Debug for GreenNode {
|
||||||
impl GreenNode {
|
impl GreenNode {
|
||||||
/// Creates a new Node.
|
/// Creates a new Node.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new<I>(kind: SyntaxKind, children: I) -> GreenNode
|
pub fn new<I>(kind: RawSyntaxKind, children: I) -> GreenNode
|
||||||
where
|
where
|
||||||
I: IntoIterator<Item = GreenElement>,
|
I: IntoIterator<Item = GreenElement>,
|
||||||
I::IntoIter: ExactSizeIterator,
|
I::IntoIter: ExactSizeIterator,
|
||||||
|
@ -72,7 +73,7 @@ impl GreenNode {
|
||||||
/// Creates a new Node.
|
/// Creates a new Node.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub(super) fn new_with_len_and_hash<I>(
|
pub(super) fn new_with_len_and_hash<I>(
|
||||||
kind: SyntaxKind,
|
kind: RawSyntaxKind,
|
||||||
children: I,
|
children: I,
|
||||||
text_len: TextSize,
|
text_len: TextSize,
|
||||||
child_hash: u32,
|
child_hash: u32,
|
||||||
|
@ -115,9 +116,9 @@ impl GreenNode {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// [`SyntaxKind`] of this node.
|
/// [`RawSyntaxKind`] of this node.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn kind(&self) -> SyntaxKind {
|
pub fn kind(&self) -> RawSyntaxKind {
|
||||||
self.data.header.header.kind
|
self.data.header.header.kind
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
use std::{fmt, hash, mem::ManuallyDrop, ptr::NonNull};
|
use std::{fmt, hash, mem::ManuallyDrop, ptr::NonNull};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
green::SyntaxKind,
|
interning::{Resolver, TokenKey},
|
||||||
interning::{Key, Resolver},
|
text::TextSize,
|
||||||
TextSize,
|
RawSyntaxKind,
|
||||||
};
|
};
|
||||||
use sptr::Strict;
|
use sptr::Strict;
|
||||||
use triomphe::Arc;
|
use triomphe::Arc;
|
||||||
|
@ -11,8 +11,8 @@ use triomphe::Arc;
|
||||||
#[repr(align(2))] // to use 1 bit for pointer tagging. NB: this is an at-least annotation
|
#[repr(align(2))] // to use 1 bit for pointer tagging. NB: this is an at-least annotation
|
||||||
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
|
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
|
||||||
pub(super) struct GreenTokenData {
|
pub(super) struct GreenTokenData {
|
||||||
pub(super) kind: SyntaxKind,
|
pub(super) kind: RawSyntaxKind,
|
||||||
pub(super) text: Option<Key>,
|
pub(super) text: Option<TokenKey>,
|
||||||
pub(super) text_len: TextSize,
|
pub(super) text_len: TextSize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,9 +54,9 @@ impl GreenToken {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// [`SyntaxKind`] of this Token.
|
/// [`RawSyntaxKind`] of this Token.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn kind(&self) -> SyntaxKind {
|
pub fn kind(&self) -> RawSyntaxKind {
|
||||||
self.data().kind
|
self.data().kind
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,9 +64,9 @@ impl GreenToken {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn text<'i, I>(&self, resolver: &'i I) -> Option<&'i str>
|
pub fn text<'i, I>(&self, resolver: &'i I) -> Option<&'i str>
|
||||||
where
|
where
|
||||||
I: Resolver + ?Sized,
|
I: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
self.data().text.map(|key| resolver.resolve(&key))
|
self.data().text.map(|key| resolver.resolve(key))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the length of text covered by this token.
|
/// Returns the length of text covered by this token.
|
||||||
|
@ -80,7 +80,7 @@ impl GreenToken {
|
||||||
///
|
///
|
||||||
/// See also [`text`](GreenToken::text).
|
/// See also [`text`](GreenToken::text).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn text_key(&self) -> Option<Key> {
|
pub fn text_key(&self) -> Option<TokenKey> {
|
||||||
self.data().text
|
self.data().text
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
205
src/interning.rs
205
src/interning.rs
|
@ -1,47 +1,186 @@
|
||||||
//! Types and Traits for efficient String storage and deduplication.
|
//! Types and Traits for efficient String storage and deduplication.
|
||||||
//!
|
//!
|
||||||
//! Interning functionality is provided by the [`lasso`](lasso) crate.
|
//! Because `cstree` is aimed at _concrete_ syntax trees that faithfully represent all of the original program input,
|
||||||
|
//! `cstree` aks for the text of each token when building a syntax tree. You'll notice this when looking at
|
||||||
|
//! [`GreenNodeBuilder::token`], which takes the kind of token and a refernce to the text of the token in the source.
|
||||||
|
//!
|
||||||
|
//! Of course, there are tokens whose text will always be the same, such as punctuation (like a semicolon), keywords
|
||||||
|
//! (like `fn`), or operators (like `<=`). Use [`Language::static_text`] when implementing `Language` to make `cstree`
|
||||||
|
//! aware of such tokens.
|
||||||
|
//!
|
||||||
|
//! There is, however, another category of tokens whose text will appear repeatedly, but for which we cannot know the
|
||||||
|
//! text upfront. Any variable, type, or method that is user-defined will likely be named more than once, but there is
|
||||||
|
//! no way to know beforehand what names a user will choose.
|
||||||
|
//!
|
||||||
|
//! In order to avoid storing the source text for these tokens many times over, `cstree` _interns_ the text of its
|
||||||
|
//! tokens (if that text is not static). What this means is that each unique string is only stored once. When a new
|
||||||
|
//! token is added - say, a variable -, we check if we already know its contents (the variable name). If the text is
|
||||||
|
//! new, we save it and give it a unique Id. If we have seen the text before, we look up its unique Id and don't need to
|
||||||
|
//! keep the new data around. As an additional benefit, interning also makes it much cheaper to copy source text around
|
||||||
|
//! and also to compare it with other source text, since what is actually being copied or compared is just an integer.
|
||||||
|
//!
|
||||||
|
//! ## I just want to build a syntax tree
|
||||||
|
//!
|
||||||
|
//! If you don't want to worry about this for now, you (mostly) can! All required functionality is implemented in
|
||||||
|
//! `cstree` and you can just use [`GreenNodeBuilder::new`] to obtain a tree builder with everything set up (see the
|
||||||
|
//! [crate documentation] for more on how to get started). This will create an interner, which the builder returns
|
||||||
|
//! together with the syntax tree on [`finish`] as part of its node cache (call [`NodeCache::into_interner`] on the
|
||||||
|
//! result to get the interner out).
|
||||||
|
//!
|
||||||
|
//! Here begins the part where you do have to think about interning: `cstree` needs the interner you get when you want
|
||||||
|
//! to look at the source text for some part of the syntax tree, so you'll have to keep it around somehow until the
|
||||||
|
//! point where you need it.
|
||||||
|
//!
|
||||||
|
//! How best to do this depends on what you need the text for. If the code that accesses the text is close-by, it might
|
||||||
|
//! be enough to pass the return value to the functions that need it (within `cstree` or in your code). Other options
|
||||||
|
//! could be to store the interner together with the syntax tree. If you use [`SyntaxNode::new_root_with_resolver`], you
|
||||||
|
//! get a syntax tree that can handle text without any need to manage and pass an interner (the reason the method is
|
||||||
|
//! called `_with_resolver` and not `_with_interner` is that it doesn't actually needs a full [`Interner`] -- once the
|
||||||
|
//! tree is created, no more text will be added, so it just needs to be able to look up text. This part is called a
|
||||||
|
//! [`Resolver`]). Or you could put the interner somewhere "global", where you can easily access it from anywhere.
|
||||||
|
//!
|
||||||
|
//! ## Using other interners
|
||||||
|
//!
|
||||||
|
//! By default, `cstree` uses its own, simple interner implementation. You can obtain an interner by calling
|
||||||
|
//! [`new_interner`], or bring your own by implementing the [`Resolver`] and [`Interner`] traits defined in this module.
|
||||||
|
//! Most methods in `cstree` require that you support interning [`TokenKey`]s. `TokenKey` implements [`InternKey`], so
|
||||||
|
//! your implementation can use that to convert to whatever types it uses for its internal representation. Note that
|
||||||
|
//! there is no way to change the size of the internal representation.
|
||||||
|
//!
|
||||||
|
//! ### `lasso`
|
||||||
|
//! Using features, you can enable support for some third-party interners. The primary one is [`lasso`], a crate focused
|
||||||
|
//! on efficient interning of text strings. This is enabled via the `lasso_compat` feature and adds the necessary trait
|
||||||
|
//! implementation to make `lasso`'s interners work with `cstree` (as well as a re-export of the matching version of
|
||||||
|
//! `lasso` here). If enabled, `cstree`'s built-in interning functionality is replaced with `lasso`'s more efficient one
|
||||||
|
//! transparently, so you'll now be returned a `lasso` interner from [`new_interner`].
|
||||||
|
//!
|
||||||
|
//! ### `salsa`
|
||||||
|
//! If you are using the "2022" version of the `salsa` incremental query framework, it is possible to use its interning
|
||||||
|
//! capabilities with `cstree` as well. Support for this is experimental, and you have to opt in via the
|
||||||
|
//! `salsa_2022_compat` feature. For instructions on how to do this, and whether you actually want to, please refer to
|
||||||
|
//! [the `salsa_compat` module documentation].
|
||||||
|
//!
|
||||||
|
//! ## Multi-threaded interners
|
||||||
|
//! If you want to use your interner on more than one thread, the interner needs to support interning new text through
|
||||||
|
//! shared access. With the `multi_threaded_interning` feature, you can get such an interner by calling
|
||||||
|
//! [`new_threaded_interner`]. The feature also enables support for `ThreadedRodeo`, the multi-threaded interner from
|
||||||
|
//! `lasso`.
|
||||||
|
//!
|
||||||
|
//! **You can pass a reference to that interner to anything that expects an [`Interner`]!**
|
||||||
|
//! While the interning methods on [`Interner`] require a `&mut self` to also work for single-threaded interners, both
|
||||||
|
//! [`Resolver`] and [`Interner`] will be implemented for `&interner` if `interner` is multi-threaded:
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! # use cstree::testing::{*, Language as _};
|
||||||
|
//! # use cstree::interning::*;
|
||||||
|
//!
|
||||||
|
//! let interner = new_threaded_interner();
|
||||||
|
//! let mut builder: GreenNodeBuilder<MyLanguage, &MultiThreadedTokenInterner> =
|
||||||
|
//! GreenNodeBuilder::from_interner(&interner);
|
||||||
|
//!
|
||||||
|
//! # builder.start_node(Root);
|
||||||
|
//! # builder.token(Int, "42");
|
||||||
|
//! # builder.finish_node();
|
||||||
|
//! parse(&mut builder, "42");
|
||||||
|
//! let (tree, cache) = builder.finish();
|
||||||
|
//!
|
||||||
|
//! // Note that we get a cache and interner back, because we passed an "owned" reference to `from_interner`
|
||||||
|
//! let used_interner = cache.unwrap().into_interner().unwrap();
|
||||||
|
//! assert_eq!(used_interner as *const _, &interner as *const _);
|
||||||
|
//!
|
||||||
|
//! let int = tree.children().next().unwrap();
|
||||||
|
//! assert_eq!(int.as_token().unwrap().text(&interner), Some("42"));
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! Here, we use `from_interner`, but pass it only a shared reference to "own". Take care to denote the type signature
|
||||||
|
//! of the `GreenNodeBuilder` appropriately.
|
||||||
|
//!
|
||||||
|
//! [crate documentation]: crate
|
||||||
|
//! [`Language::static_text`]: crate::Language::static_text
|
||||||
|
//! [`GreenNodeBuilder::token`]: crate::build::GreenNodeBuilder::token
|
||||||
|
//! [`GreenNodeBuilder::new`]: crate::build::GreenNodeBuilder::new
|
||||||
|
//! [`finish`]: crate::build::GreenNodeBuilder::finish
|
||||||
|
//! [`NodeCache::into_interner`]: crate::build::NodeCache::into_interner
|
||||||
|
//! [`SyntaxNode::new_root_with_resolver`]: crate::syntax::SyntaxNode::new_root_with_resolver
|
||||||
|
//! [`lasso`]: lasso
|
||||||
|
//! [the `salsa_compat` module documentation]: salsa_compat
|
||||||
|
|
||||||
pub use fxhash::FxBuildHasher as Hasher;
|
mod traits;
|
||||||
|
pub use self::traits::*;
|
||||||
|
|
||||||
pub use crate::green::TokenInterner;
|
mod default_interner;
|
||||||
|
|
||||||
/// The index type for all interners. Each key represents
|
#[cfg(not(feature = "lasso_compat"))]
|
||||||
pub type Key = lasso::Spur;
|
#[doc(inline)]
|
||||||
pub use lasso::{Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver};
|
pub use default_interner::TokenInterner;
|
||||||
|
|
||||||
/// A string interner that caches strings quickly with a minimal memory footprint, returning a unique key to re-access
|
#[cfg(feature = "lasso_compat")]
|
||||||
/// it with `O(1)` times. By default, `Rodeo` uses an [`fxhash`] [`Hasher`].
|
mod lasso_compat;
|
||||||
pub type Rodeo<S = Hasher> = lasso::Rodeo<Key, S>;
|
|
||||||
|
|
||||||
/// Constructs a new, single-threaded interner.
|
#[cfg(feature = "lasso_compat")]
|
||||||
|
#[doc(inline)]
|
||||||
|
pub use lasso_compat::TokenInterner;
|
||||||
|
|
||||||
|
#[cfg(feature = "multi_threaded_interning")]
|
||||||
|
#[doc(inline)]
|
||||||
|
pub use lasso_compat::MultiThreadedTokenInterner;
|
||||||
|
|
||||||
|
#[cfg(feature = "lasso_compat")]
|
||||||
|
#[cfg_attr(doc_cfg, doc(cfg(feature = "lasso_compat")))]
|
||||||
|
pub use lasso;
|
||||||
|
|
||||||
|
#[cfg(feature = "salsa_2022_compat")]
|
||||||
|
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
|
||||||
|
pub mod salsa_compat;
|
||||||
|
|
||||||
|
use core::fmt;
|
||||||
|
use std::num::NonZeroU32;
|
||||||
|
|
||||||
|
/// The intern key type for the source text of [`GreenToken`s](crate::green::GreenToken).
|
||||||
|
/// Each unique key uniquely identifies a deduplicated, interned source string.
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct TokenKey {
|
||||||
|
inner: NonZeroU32,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safety: we match `+ 1` and `- 1`, so it is always possible to round-trip.
|
||||||
|
unsafe impl InternKey for TokenKey {
|
||||||
|
#[inline]
|
||||||
|
fn into_u32(self) -> u32 {
|
||||||
|
self.inner.get() - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
fn try_from_u32(key: u32) -> Option<Self> {
|
||||||
|
(key < u32::MAX).then(|| Self {
|
||||||
|
// Safety: non-zero by increment.
|
||||||
|
// Overflow is impossible under the check above.
|
||||||
|
inner: unsafe { NonZeroU32::new_unchecked(key + 1) },
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for TokenKey {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.write_fmt(format_args!("TokenKey({})", self.inner))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Constructs a new, single-threaded [`Interner`](traits::Interner).
|
||||||
///
|
///
|
||||||
/// If you need the interner to be multi-threaded, see [`new_threaded_interner`].
|
/// If you need the interner to be multi-threaded, see [`new_threaded_interner`].
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new_interner() -> Rodeo {
|
pub fn new_interner() -> TokenInterner {
|
||||||
Rodeo::with_hasher(Hasher::default())
|
TokenInterner::new()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A string interner that caches strings quickly with a minimal memory footprint, returning a unique key to re-access
|
/// Constructs a new [`Interner`](traits::Interner) that can be used across multiple threads.
|
||||||
/// it with `O(1)` times. By default, `ThreadedRodeo` uses an [`fxhash`] [`Hasher`].
|
///
|
||||||
pub type ThreadedRodeo<S = Hasher> = lasso::ThreadedRodeo<Key, S>;
|
/// Note that you can use `&MultiThreadTokenInterner` to access interning methods through a shared reference, as well as
|
||||||
|
/// construct new syntax trees. See [the module documentation](self) for more information and examples.
|
||||||
/// Constructs a new interner that can be used across multiple threads.
|
#[cfg(feature = "multi_threaded_interning")]
|
||||||
|
#[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new_threaded_interner() -> ThreadedRodeo {
|
pub fn new_threaded_interner() -> MultiThreadedTokenInterner {
|
||||||
ThreadedRodeo::with_hasher(Hasher::default())
|
MultiThreadedTokenInterner::new()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A read-only view of a [`Rodeo`] or [`ThreadedRodeo`] that allows contention-free access to interned strings, both
|
|
||||||
/// key to string resolution and string to key lookups.
|
|
||||||
///
|
|
||||||
/// The hasher is the same as the Rodeo or ThreadedRodeo that created it.
|
|
||||||
/// Can be acquired with the `into_reader` methods (see also [`IntoReader`]).
|
|
||||||
pub type RodeoReader<S = Hasher> = lasso::RodeoReader<Key, S>;
|
|
||||||
|
|
||||||
/// A read-only view of a [`Rodeo`] or [`ThreadedRodeo`] that allows contention-free access to interned strings with
|
|
||||||
/// only key to string resolution.
|
|
||||||
///
|
|
||||||
/// Can be acquired with the `into_resolver` methods (see also [`IntoResolver`]).
|
|
||||||
pub type RodeoResolver = lasso::RodeoResolver<Key>;
|
|
||||||
pub use lasso::{Capacity, Iter, LassoError, LassoErrorKind, LassoResult, MemoryLimits, Strings};
|
|
||||||
|
|
70
src/interning/default_interner.rs
Normal file
70
src/interning/default_interner.rs
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
#![cfg(not(feature = "lasso_compat"))]
|
||||||
|
|
||||||
|
use core::fmt;
|
||||||
|
|
||||||
|
use fxhash::FxBuildHasher as Hasher;
|
||||||
|
use indexmap::IndexSet;
|
||||||
|
|
||||||
|
use super::{InternKey, Interner, Resolver, TokenKey};
|
||||||
|
|
||||||
|
/// The default [`Interner`] used to deduplicate green token strings.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct TokenInterner {
|
||||||
|
id_set: IndexSet<String, Hasher>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TokenInterner {
|
||||||
|
pub(in crate::interning) fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
id_set: IndexSet::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub enum InternerError {
|
||||||
|
KeySpaceExhausted,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for InternerError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
InternerError::KeySpaceExhausted => write!(f, "key space exhausted"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for InternerError {}
|
||||||
|
|
||||||
|
impl Resolver<TokenKey> for TokenInterner {
|
||||||
|
fn try_resolve(&self, key: TokenKey) -> Option<&str> {
|
||||||
|
let index = key.into_u32() as usize;
|
||||||
|
self.id_set.get_index(index).map(String::as_str)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// `TokenKey` can represent `1` to `u32::MAX` (due to the `NonNull` niche), so `u32::MAX` elements.
|
||||||
|
// Set indices start at 0, so everything shifts down by 1.
|
||||||
|
const N_INDICES: usize = u32::MAX as usize;
|
||||||
|
|
||||||
|
impl Interner<TokenKey> for TokenInterner {
|
||||||
|
type Error = InternerError;
|
||||||
|
|
||||||
|
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
|
||||||
|
if let Some(index) = self.id_set.get_index_of(text) {
|
||||||
|
let raw_key = u32::try_from(index).unwrap_or_else(|_| {
|
||||||
|
panic!("found interned text with invalid index `{index}` (index too high for keyspace)")
|
||||||
|
});
|
||||||
|
return Ok(TokenKey::try_from_u32(raw_key).unwrap_or_else(|| {
|
||||||
|
panic!("found interned text with invalid index `{index}` (index too high for keyspace)")
|
||||||
|
}));
|
||||||
|
} else if self.id_set.len() >= N_INDICES {
|
||||||
|
return Err(InternerError::KeySpaceExhausted);
|
||||||
|
}
|
||||||
|
|
||||||
|
let (index, added) = self.id_set.insert_full(text.to_string());
|
||||||
|
debug_assert!(added, "tried to intern duplicate text");
|
||||||
|
let raw_key = u32::try_from(index).unwrap_or_else(|_| panic!("interned `{index}` despite keyspace exhaustion"));
|
||||||
|
TokenKey::try_from_u32(raw_key).ok_or(InternerError::KeySpaceExhausted)
|
||||||
|
}
|
||||||
|
}
|
9
src/interning/lasso_compat.rs
Normal file
9
src/interning/lasso_compat.rs
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
//! Bridge between `cstree`'s and `lasso`'s types and traits.
|
||||||
|
|
||||||
|
#![cfg(feature = "lasso_compat")]
|
||||||
|
|
||||||
|
mod token_interner;
|
||||||
|
#[doc(inline)]
|
||||||
|
pub use token_interner::*;
|
||||||
|
|
||||||
|
mod traits;
|
109
src/interning/lasso_compat/token_interner.rs
Normal file
109
src/interning/lasso_compat/token_interner.rs
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
//! Default interner implementations based on `lasso`.
|
||||||
|
|
||||||
|
#![cfg(feature = "lasso_compat")]
|
||||||
|
|
||||||
|
use std::{hash::BuildHasher, num::NonZeroUsize};
|
||||||
|
|
||||||
|
use fxhash::FxBuildHasher as Hasher;
|
||||||
|
use lasso::{Capacity, Rodeo, ThreadedRodeo};
|
||||||
|
|
||||||
|
use crate::interning::{Interner, Resolver, TokenKey};
|
||||||
|
|
||||||
|
/// Default number of strings that the interner will initially allocate space for.
|
||||||
|
/// Value recommended by the author of `lasso`.
|
||||||
|
const DEFAULT_STRING_CAPACITY: usize = 512;
|
||||||
|
|
||||||
|
/// Default memory in bytes that the interner will initially allocate space for.
|
||||||
|
/// Value recommended by the author of `lasso`.
|
||||||
|
const DEFAULT_BYTE_CAPACITY: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(4096) };
|
||||||
|
|
||||||
|
macro_rules! impl_traits {
|
||||||
|
(for $interner:ty $(, if #[cfg(feature = $feature:literal)])?) => {
|
||||||
|
$(#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))])?
|
||||||
|
impl Resolver<TokenKey> for $interner {
|
||||||
|
#[inline]
|
||||||
|
fn try_resolve(&self, key: TokenKey) -> Option<&str> {
|
||||||
|
self.rodeo.try_resolve(&key)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn resolve(&self, key: TokenKey) -> &str {
|
||||||
|
self.rodeo.resolve(&key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$(#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))])?
|
||||||
|
impl Interner<TokenKey> for $interner {
|
||||||
|
type Error = lasso::LassoError;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
|
||||||
|
self.rodeo.try_get_or_intern(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn get_or_intern(&mut self, text: &str) -> TokenKey {
|
||||||
|
self.rodeo.get_or_intern(text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The default [`Interner`] used to deduplicate green token strings.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct TokenInterner {
|
||||||
|
rodeo: Rodeo<TokenKey, Hasher>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TokenInterner {
|
||||||
|
pub(in crate::interning) fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
rodeo: Rodeo::with_capacity_and_hasher(
|
||||||
|
Capacity::new(DEFAULT_STRING_CAPACITY, DEFAULT_BYTE_CAPACITY),
|
||||||
|
Hasher::default(),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the [`Rodeo`] backing this interner.
|
||||||
|
#[cfg_attr(doc_cfg, doc(cfg(feature = "lasso_compat")))]
|
||||||
|
#[inline]
|
||||||
|
pub fn into_inner(self) -> Rodeo<TokenKey, impl BuildHasher> {
|
||||||
|
self.rodeo
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_traits!(for TokenInterner);
|
||||||
|
|
||||||
|
#[cfg(feature = "multi_threaded_interning")]
|
||||||
|
pub use multi_threaded::MultiThreadedTokenInterner;
|
||||||
|
|
||||||
|
#[cfg(feature = "multi_threaded_interning")]
|
||||||
|
mod multi_threaded {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// A threadsafe [`Interner`] for deduplicating [`GreenToken`](crate::green::GreenToken) strings.
|
||||||
|
///
|
||||||
|
/// Note that [`Interner`] and [`Resolver`] are also implemented for `&MultiThreadTokenInterner` so you can pass
|
||||||
|
/// `&mut &interner` in shared contexts.
|
||||||
|
#[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))]
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct MultiThreadedTokenInterner {
|
||||||
|
rodeo: ThreadedRodeo<TokenKey, Hasher>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MultiThreadedTokenInterner {
|
||||||
|
pub(in crate::interning) fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
rodeo: ThreadedRodeo::with_capacity_and_hasher(
|
||||||
|
Capacity::new(DEFAULT_STRING_CAPACITY, DEFAULT_BYTE_CAPACITY),
|
||||||
|
Hasher::default(),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_traits!(for MultiThreadedTokenInterner, if #[cfg(feature = "multi_threaded_interning")]);
|
||||||
|
|
||||||
|
impl_traits!(for &MultiThreadedTokenInterner, if #[cfg(feature = "multi_threaded_interning")]);
|
||||||
|
}
|
166
src/interning/lasso_compat/traits.rs
Normal file
166
src/interning/lasso_compat/traits.rs
Normal file
|
@ -0,0 +1,166 @@
|
||||||
|
#![cfg(feature = "lasso_compat")]
|
||||||
|
|
||||||
|
use core::fmt;
|
||||||
|
use std::hash::{BuildHasher, Hash};
|
||||||
|
|
||||||
|
use crate::interning::{
|
||||||
|
traits::{InternKey, Interner, Resolver},
|
||||||
|
TokenKey,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Safety: `InternKey` has the same invariant as `lasso::Key`
|
||||||
|
unsafe impl lasso::Key for TokenKey {
|
||||||
|
fn into_usize(self) -> usize {
|
||||||
|
self.into_u32() as usize
|
||||||
|
}
|
||||||
|
|
||||||
|
fn try_from_usize(int: usize) -> Option<Self> {
|
||||||
|
let raw_key = u32::try_from(int).ok()?;
|
||||||
|
Self::try_from_u32(raw_key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
|
pub enum LassoCompatError {
|
||||||
|
LassoError(lasso::LassoError),
|
||||||
|
KeyConversionError { lasso_key: usize },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<lasso::LassoError> for LassoCompatError {
|
||||||
|
#[inline]
|
||||||
|
fn from(error: lasso::LassoError) -> Self {
|
||||||
|
Self::LassoError(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for LassoCompatError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
LassoCompatError::LassoError(lasso_error) => write!(f, "{lasso_error}"),
|
||||||
|
LassoCompatError::KeyConversionError { lasso_key } => write!(
|
||||||
|
f,
|
||||||
|
"invalid key: failed to convert `lasso::Key` `{lasso_key}` to `InternKey`"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for LassoCompatError {}
|
||||||
|
|
||||||
|
macro_rules! compat_resolver {
|
||||||
|
($resolver:ident<K$(, $hasher:ident)?> $(where $($t:ident : $bound:ident),+)? $(if #[cfg(feature = $feature:literal)])?) => {
|
||||||
|
$(#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))])?
|
||||||
|
impl<K$(, $hasher)?> Resolver<TokenKey> for lasso::$resolver<K$(, $hasher)?>
|
||||||
|
where
|
||||||
|
K: lasso::Key,
|
||||||
|
$($($t: $bound),+)?
|
||||||
|
{
|
||||||
|
fn try_resolve(&self, key: TokenKey) -> Option<&str> {
|
||||||
|
let raw_key = TokenKey::into_u32(key);
|
||||||
|
let lasso_key = K::try_from_usize(raw_key as usize)?;
|
||||||
|
<Self as lasso::Resolver<K>>::try_resolve(self, &lasso_key)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve(&self, key: TokenKey) -> &str {
|
||||||
|
let raw_key = TokenKey::into_u32(key);
|
||||||
|
let lasso_key = K::try_from_usize(raw_key as usize).expect(&format!(
|
||||||
|
"invalid key: failed to convert `{key:?}` to `lasso::Key`"
|
||||||
|
));
|
||||||
|
<Self as lasso::Resolver<K>>::resolve(self, &lasso_key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! compat_interner {
|
||||||
|
($interner:ident<K, S> $(where $($t:ident : $bound:ident),+)? if #[cfg(feature = $feature:literal)]) => {
|
||||||
|
#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))]
|
||||||
|
impl<K, S> Interner<TokenKey> for lasso::$interner<K, S>
|
||||||
|
where
|
||||||
|
K: lasso::Key,
|
||||||
|
S: BuildHasher,
|
||||||
|
$($($t: $bound),+)?
|
||||||
|
{
|
||||||
|
type Error = LassoCompatError;
|
||||||
|
|
||||||
|
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
|
||||||
|
let lasso_key = <Self as lasso::Interner<K>>::try_get_or_intern(self, text)?;
|
||||||
|
let raw_key = K::into_usize(lasso_key);
|
||||||
|
u32::try_from(raw_key)
|
||||||
|
.ok()
|
||||||
|
.and_then(TokenKey::try_from_u32)
|
||||||
|
.ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_or_intern(&mut self, text: &str) -> TokenKey {
|
||||||
|
let lasso_key = <Self as lasso::Interner<K>>::get_or_intern(self, text);
|
||||||
|
let raw_key = K::into_usize(lasso_key);
|
||||||
|
u32::try_from(raw_key)
|
||||||
|
.ok()
|
||||||
|
.and_then(TokenKey::try_from_u32)
|
||||||
|
.ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key })
|
||||||
|
.unwrap_or_else(|_| panic!("invalid key: failed to convert `lasso::Key` `{raw_key}` to `InternKey` (failed to intern {text:?})"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
compat_resolver!(RodeoReader<K, S> if #[cfg(feature = "lasso_compat")]);
|
||||||
|
compat_resolver!(RodeoResolver<K> if #[cfg(feature = "lasso_compat")]);
|
||||||
|
|
||||||
|
compat_resolver!(Rodeo<K, S> if #[cfg(feature = "lasso_compat")]);
|
||||||
|
compat_interner!(Rodeo<K, S> if #[cfg(feature = "lasso_compat")]);
|
||||||
|
|
||||||
|
#[cfg(feature = "multi_threaded_interning")]
|
||||||
|
mod multi_threaded {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
compat_resolver!(ThreadedRodeo<K, S> where K: Hash, S: BuildHasher, S: Clone if #[cfg(feature = "multi_threaded_interning")]);
|
||||||
|
|
||||||
|
compat_interner!(ThreadedRodeo<K, S> where K: Hash, S: Clone if #[cfg(feature = "multi_threaded_interning")]);
|
||||||
|
|
||||||
|
#[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))]
|
||||||
|
impl<K, S> Resolver<TokenKey> for &lasso::ThreadedRodeo<K, S>
|
||||||
|
where
|
||||||
|
K: lasso::Key + Hash,
|
||||||
|
S: BuildHasher + Clone,
|
||||||
|
{
|
||||||
|
#[inline]
|
||||||
|
fn try_resolve(&self, key: TokenKey) -> Option<&str> {
|
||||||
|
<lasso::ThreadedRodeo<K, S> as Resolver<TokenKey>>::try_resolve(self, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn resolve(&self, key: TokenKey) -> &str {
|
||||||
|
<lasso::ThreadedRodeo<K, S> as Resolver<TokenKey>>::resolve(self, key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))]
|
||||||
|
impl<K, S> Interner<TokenKey> for &lasso::ThreadedRodeo<K, S>
|
||||||
|
where
|
||||||
|
K: lasso::Key + Hash,
|
||||||
|
S: BuildHasher + Clone,
|
||||||
|
{
|
||||||
|
type Error = <lasso::ThreadedRodeo<K, S> as Interner<TokenKey>>::Error;
|
||||||
|
|
||||||
|
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
|
||||||
|
let lasso_key = <Self as lasso::Interner<K>>::try_get_or_intern(self, text)?;
|
||||||
|
let raw_key = K::into_usize(lasso_key);
|
||||||
|
u32::try_from(raw_key)
|
||||||
|
.ok()
|
||||||
|
.and_then(TokenKey::try_from_u32)
|
||||||
|
.ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_or_intern(&mut self, text: &str) -> TokenKey {
|
||||||
|
let lasso_key = <Self as lasso::Interner<K>>::get_or_intern(self, text);
|
||||||
|
let raw_key = K::into_usize(lasso_key);
|
||||||
|
u32::try_from(raw_key)
|
||||||
|
.ok()
|
||||||
|
.and_then(TokenKey::try_from_u32)
|
||||||
|
.ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key })
|
||||||
|
.unwrap_or_else(|_| panic!("invalid key: failed to convert `lasso::Key` `{raw_key}` to `InternKey` (failed to intern {text:?})"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
228
src/interning/salsa_compat.rs
Normal file
228
src/interning/salsa_compat.rs
Normal file
|
@ -0,0 +1,228 @@
|
||||||
|
//! # Using a `salsa` database as the interner for `cstree`
|
||||||
|
//!
|
||||||
|
//! <p
|
||||||
|
//! style="background:rgba(255,181,77,0.16);padding:0.75em;white-space:normal;font:inherit;">
|
||||||
|
//! <strong>Warning</strong>: Compatibility is only provided for "Salsa 2022".
|
||||||
|
//! This version is currently under active development and <code style="background:rgba(41,24,0,0.9);">cstree</code>'s
|
||||||
|
//! compatibility features are unstable until there is an official
|
||||||
|
//! release.
|
||||||
|
//! Older versions of `salsa` are not supported.
|
||||||
|
//! </p>
|
||||||
|
//!
|
||||||
|
//! If you are using the `salsa` query system, you already have access to an implemenation of interning through
|
||||||
|
//! [`#[salsa::interned]`](macro@salsa::interned). This is all that is needed to use `cstree` and this module provides
|
||||||
|
//! the utilities needed to use `salsa`'s interners for working with syntax trees.
|
||||||
|
//!
|
||||||
|
//! Note that the primary benefit of this is that it avoids additional dependencies because it uses an interner that you
|
||||||
|
//! already depend on, but it can also be beneficial to use an interner that is more specialized towards string
|
||||||
|
//! interning. In particular, using `salsa`'s interning requires allocating all strings that are interned even if they
|
||||||
|
//! are deduplicated because they already exist in the interner.
|
||||||
|
//!
|
||||||
|
//! ## How to do it
|
||||||
|
//!
|
||||||
|
//! ```
|
||||||
|
//! # use cstree::testing::*;
|
||||||
|
//! # use cstree::interning::salsa_compat::salsa;
|
||||||
|
//! # use cstree::impl_cstree_interning_for_salsa;
|
||||||
|
//! // Define the `salsa` jar, database and intern Id
|
||||||
|
//! #[salsa::jar(db = Db)]
|
||||||
|
//! pub struct Jar(SourceId);
|
||||||
|
//!
|
||||||
|
//! pub trait Db: salsa::DbWithJar<Jar> {}
|
||||||
|
//! impl<DB> Db for DB where DB: ?Sized + salsa::DbWithJar<Jar> {}
|
||||||
|
//!
|
||||||
|
//! // If you are not a doctest and can put `Jar` at the root of your crate,
|
||||||
|
//! // this can just be `#[salsa::interned]`.
|
||||||
|
//! #[salsa::interned(jar = Jar)]
|
||||||
|
//! pub struct SourceId {
|
||||||
|
//! #[return_ref]
|
||||||
|
//! pub text: String,
|
||||||
|
//! }
|
||||||
|
//!
|
||||||
|
//! #[derive(Default)]
|
||||||
|
//! #[salsa::db(Jar)]
|
||||||
|
//! struct Database {
|
||||||
|
//! storage: salsa::Storage<Self>,
|
||||||
|
//! }
|
||||||
|
//! impl salsa::Database for Database {}
|
||||||
|
//!
|
||||||
|
//! // Let `cstree` define a conversion trait and implement it for your database.
|
||||||
|
//! // `Database` is your db type, `SourceId` is your interning id, and `text` is
|
||||||
|
//! // its text field (all as defined above).
|
||||||
|
//! impl_cstree_interning_for_salsa!(impl Interning for Database => text as SourceId);
|
||||||
|
//!
|
||||||
|
//! // Build a tree with the `salsa` interner
|
||||||
|
//! let db = Database::default();
|
||||||
|
//! let interner = db.as_interner(); // <-- conversion happens here
|
||||||
|
//! let mut shared_interner = &interner;
|
||||||
|
//! let mut builder: GreenNodeBuilder<TestLang, _> = GreenNodeBuilder::with_interner(&mut shared_interner);
|
||||||
|
//! let (tree, _no_interner_because_it_was_borrowed) = {
|
||||||
|
//! builder.start_node(TestSyntaxKind::Plus);
|
||||||
|
//! builder.token(TestSyntaxKind::Float, "2.05");
|
||||||
|
//! builder.token(TestSyntaxKind::Whitespace, " ");
|
||||||
|
//! builder.token(TestSyntaxKind::Plus, "+");
|
||||||
|
//! builder.token(TestSyntaxKind::Whitespace, " ");
|
||||||
|
//! builder.token(TestSyntaxKind::Float, "7.32");
|
||||||
|
//! builder.finish_node();
|
||||||
|
//! builder.finish()
|
||||||
|
//! };
|
||||||
|
//! let tree: SyntaxNode<TestLang> = SyntaxNode::new_root(tree);
|
||||||
|
//! assert_eq!(tree.resolve_text(shared_interner), "2.05 + 7.32");
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! The full code is also available in the `salsa` example.
|
||||||
|
//!
|
||||||
|
//! ## Working with `InternWithDb` directly
|
||||||
|
//! If you don't want the trait, or macros, or if you just need more control about what happens during interning and
|
||||||
|
//! resolution, you can skip using [`impl_cstree_interning_for_salsa`](crate::impl_cstree_interning_for_salsa) and use
|
||||||
|
//! [`InternWithDb`] directly.
|
||||||
|
//!
|
||||||
|
//! Because `salsa` generates inherent methods (and not, for example, a trait implementation), we need information about
|
||||||
|
//! the used interning id either way. All that `as_interner` does is construct an instance of `InternWithDb` that uses
|
||||||
|
//! the generated methods to invoke `salsa`s interner. The implementation expands to
|
||||||
|
//! ```text
|
||||||
|
//! InternWithDb::new(
|
||||||
|
//! db,
|
||||||
|
//! |db, text| SourceId::new(db, text),
|
||||||
|
//! |db, id| id.text(db),
|
||||||
|
//! )
|
||||||
|
//! ```
|
||||||
|
//! but you may provide any function that doesn't capture.
|
||||||
|
|
||||||
|
#![cfg(feature = "salsa_2022_compat")]
|
||||||
|
|
||||||
|
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
|
||||||
|
pub use salsa;
|
||||||
|
|
||||||
|
use core::fmt;
|
||||||
|
|
||||||
|
use super::{InternKey, Interner, Resolver, TokenKey};
|
||||||
|
|
||||||
|
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
|
||||||
|
impl salsa::AsId for TokenKey {
|
||||||
|
fn as_id(self) -> salsa::Id {
|
||||||
|
salsa::Id::from_u32(self.into_u32())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create an instance of the intern-key from an ID.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if the given `id` from `salsa` cannot be represented by a [`TokenKey`].
|
||||||
|
fn from_id(id: salsa::Id) -> Self {
|
||||||
|
TokenKey::try_from_u32(id.as_u32())
|
||||||
|
.unwrap_or_else(|| panic!("`salsa::Id` is invalid for `TokenKey`'s keyspace: {id:?}"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generates an extension trait `SalsaAsInterner` that lets you call `db.as_interner()` on your [`salsa::Database`] to
|
||||||
|
/// obtain a `cstree` compatible [`Interner`].
|
||||||
|
///
|
||||||
|
/// The `as_interner` method returns an instance of [`InternWithDb`] that uses the functions generated by `salsa` for
|
||||||
|
/// your Id type to perform interning and resolution.
|
||||||
|
///
|
||||||
|
/// If you have defined your interned text as
|
||||||
|
/// ```ignore
|
||||||
|
/// #[salsa::interned]
|
||||||
|
/// pub struct SourceId {
|
||||||
|
/// #[return_ref]
|
||||||
|
/// pub text: String,
|
||||||
|
/// }
|
||||||
|
/// ```
|
||||||
|
/// the syntax is
|
||||||
|
/// ```ignore
|
||||||
|
/// impl_cstree_interning_for_salsa!(impl Interning for YourDatabase => text as SourceId);
|
||||||
|
/// ```
|
||||||
|
/// where `text` the name of the interned field.
|
||||||
|
/// Note that the use of `#[return_ref]` is required.
|
||||||
|
#[macro_export]
|
||||||
|
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
|
||||||
|
macro_rules! impl_cstree_interning_for_salsa {
|
||||||
|
(impl Interning for $db:ty => $name:ident as $id:ty) => {
|
||||||
|
trait SalsaAsInterner {
|
||||||
|
fn as_interner(&self) -> ::cstree::interning::salsa_compat::InternWithDb<'_, $db, $id>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SalsaAsInterner for Database {
|
||||||
|
fn as_interner(&self) -> ::cstree::interning::salsa_compat::InternWithDb<'_, $db, $id> {
|
||||||
|
::cstree::interning::salsa_compat::InternWithDb::new(
|
||||||
|
self,
|
||||||
|
|db, text| <$id>::new(db, text),
|
||||||
|
|db, id| id.$name(db),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This type allows you to wrap access to a [`salsa::Database`] together with an interning and a lookup function, which
|
||||||
|
/// makes it implement [`Interner`] and [`Resolver`]. The [module documentation](self) shows how to use this with your
|
||||||
|
/// own database, or you can use [`impl_cstree_interning_for_salsa`](crate::impl_cstree_interning_for_salsa).
|
||||||
|
///
|
||||||
|
/// The interning traits are also implemented by `&InternWithDb`, as the `salsa` database supports interning through
|
||||||
|
/// shared references (see also [the `interning` module documentation](super)).
|
||||||
|
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
|
||||||
|
pub struct InternWithDb<'db, Db: salsa::Database, Id: salsa::interned::InternedId> {
|
||||||
|
db: &'db Db,
|
||||||
|
intern: fn(&Db, text: String) -> Id,
|
||||||
|
lookup: fn(&Db, Id) -> &str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> fmt::Debug for InternWithDb<'db, Db, Id> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.write_str("InternWithDb")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> InternWithDb<'db, Db, Id> {
|
||||||
|
/// Create an [`Interner`] that works with `cstree` but uses the given `db` from `salsa`.
|
||||||
|
/// To do this, you need to provide a function for interning new strings that creates the [`InternedId`] that you
|
||||||
|
/// defined with [`#[salsa::interned]`](macro@salsa::interned), and a second one that resolves an Id using your
|
||||||
|
/// database. See the [module documentation](self) for an example.
|
||||||
|
///
|
||||||
|
/// [`InternedId`]: salsa::interned::InternedId
|
||||||
|
pub fn new(db: &'db Db, intern: fn(&Db, text: String) -> Id, lookup: fn(&Db, Id) -> &str) -> Self {
|
||||||
|
Self { db, intern, lookup }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Resolver<TokenKey> for InternWithDb<'db, Db, Id> {
|
||||||
|
fn try_resolve(&self, key: TokenKey) -> Option<&'db str> {
|
||||||
|
use salsa::AsId;
|
||||||
|
|
||||||
|
let key = Id::from_id(key.as_id());
|
||||||
|
let text = (self.lookup)(self.db, key);
|
||||||
|
Some(text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Interner<TokenKey> for InternWithDb<'db, Db, Id> {
|
||||||
|
type Error = std::convert::Infallible;
|
||||||
|
|
||||||
|
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
|
||||||
|
use salsa::AsId;
|
||||||
|
|
||||||
|
let id = (self.intern)(self.db, text.to_string());
|
||||||
|
Ok(TokenKey::from_id(id.as_id()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Resolver<TokenKey> for &InternWithDb<'db, Db, Id> {
|
||||||
|
fn try_resolve(&self, key: TokenKey) -> Option<&'db str> {
|
||||||
|
use salsa::AsId;
|
||||||
|
|
||||||
|
let key = Id::from_id(key.as_id());
|
||||||
|
let text = (self.lookup)(self.db, key);
|
||||||
|
Some(text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Interner<TokenKey> for &InternWithDb<'db, Db, Id> {
|
||||||
|
type Error = std::convert::Infallible;
|
||||||
|
|
||||||
|
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
|
||||||
|
use salsa::AsId;
|
||||||
|
|
||||||
|
let id = (self.intern)(self.db, text.to_string());
|
||||||
|
Ok(TokenKey::from_id(id.as_id()))
|
||||||
|
}
|
||||||
|
}
|
67
src/interning/traits.rs
Normal file
67
src/interning/traits.rs
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
use core::fmt;
|
||||||
|
|
||||||
|
use super::TokenKey;
|
||||||
|
|
||||||
|
/// Common interface for all intern keys via conversion to and from `u32`.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
/// Implementations must guarantee that keys can round-trip in both directions: going from `Self` to `u32` to `Self` and
|
||||||
|
/// going from `u32` to `Self` to `u32` must each yield the original value.
|
||||||
|
pub unsafe trait InternKey: Copy + Eq + fmt::Debug {
|
||||||
|
/// Convert `self` into its raw representation.
|
||||||
|
fn into_u32(self) -> u32;
|
||||||
|
|
||||||
|
/// Try to reconstruct an intern key from its raw representation.
|
||||||
|
/// Returns `None` if `key` is not a valid key.
|
||||||
|
fn try_from_u32(key: u32) -> Option<Self>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The read-only part of an interner.
|
||||||
|
/// Allows to perform lookups of intern keys to resolve them to their interned text.
|
||||||
|
pub trait Resolver<Key: InternKey = TokenKey> {
|
||||||
|
/// Tries to resolve the given `key` and return its interned text.
|
||||||
|
///
|
||||||
|
/// If `self` does not contain any text for `key`, `None` is returned.
|
||||||
|
fn try_resolve(&self, key: Key) -> Option<&str>;
|
||||||
|
|
||||||
|
/// Resolves `key` to its interned text.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if there is no text for `key`.
|
||||||
|
///
|
||||||
|
/// Compatibility implementations for interners from other crates may also panic if `key` cannot be converted to the
|
||||||
|
/// key type of the external interner. Please ensure you configure any external interners appropriately (for
|
||||||
|
/// example by choosing an appropriately sized key type).
|
||||||
|
fn resolve(&self, key: Key) -> &str {
|
||||||
|
self.try_resolve(key)
|
||||||
|
.unwrap_or_else(|| panic!("failed to resolve `{key:?}`"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A full interner, which can intern new strings returning intern keys and also resolve intern keys to the interned
|
||||||
|
/// value.
|
||||||
|
///
|
||||||
|
/// **Note:** Because single-threaded interners may require mutable access, the methods on this trait take `&mut self`.
|
||||||
|
/// In order to use a multi- (or single)-threaded interner that allows access through a shared reference, it is
|
||||||
|
/// implemented for `&`[`MultiThreadedTokenInterner`](crate::interning::MultiThreadedTokenInterner), allowing it to be
|
||||||
|
/// used with a `&mut &MultiThreadTokenInterner`.
|
||||||
|
pub trait Interner<Key: InternKey = TokenKey>: Resolver<Key> {
|
||||||
|
/// Represents possible ways in which interning may fail.
|
||||||
|
/// For example, this might be running out of fresh intern keys, or failure to allocate sufficient space for a new
|
||||||
|
/// value.
|
||||||
|
type Error;
|
||||||
|
|
||||||
|
/// Interns `text` and returns a new intern key for it.
|
||||||
|
/// If `text` was already previously interned, it will not be used and the existing intern key for its value will be
|
||||||
|
/// returned.
|
||||||
|
fn try_get_or_intern(&mut self, text: &str) -> Result<Key, Self::Error>;
|
||||||
|
|
||||||
|
/// Interns `text` and returns a new intern key for it.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if the internment process raises an [`Error`](Interner::Error).
|
||||||
|
fn get_or_intern(&mut self, text: &str) -> Key {
|
||||||
|
self.try_get_or_intern(text)
|
||||||
|
.unwrap_or_else(|_| panic!("failed to intern `{text:?}`"))
|
||||||
|
}
|
||||||
|
}
|
405
src/lib.rs
405
src/lib.rs
|
@ -2,7 +2,7 @@
|
||||||
//! "Traditional" abstract syntax trees (ASTs) usually contain different types of nodes which represent information
|
//! "Traditional" abstract syntax trees (ASTs) usually contain different types of nodes which represent information
|
||||||
//! about the source text of a document and reduce this information to the minimal amount necessary to correctly
|
//! about the source text of a document and reduce this information to the minimal amount necessary to correctly
|
||||||
//! interpret it. In contrast, CSTs are lossless representations of the entire input where all tree nodes are
|
//! interpret it. In contrast, CSTs are lossless representations of the entire input where all tree nodes are
|
||||||
//! represented uniformly (i.e. the nodes are _untyped_), but include a [`SyntaxKind`] field to determine the kind of
|
//! represented uniformly (i.e. the nodes are _untyped_), but include a [`RawSyntaxKind`] field to determine the kind of
|
||||||
//! node.
|
//! node.
|
||||||
//! One of the big advantages of this representation is not only that it can recreate the original source exactly, but
|
//! One of the big advantages of this representation is not only that it can recreate the original source exactly, but
|
||||||
//! also that it lends itself very well to the representation of _incomplete or erroneous_ trees and is thus very suited
|
//! also that it lends itself very well to the representation of _incomplete or erroneous_ trees and is thus very suited
|
||||||
|
@ -35,41 +35,385 @@
|
||||||
//! references. You can still `clone` to obtain an owned node, but you only pay that cost when you need to.
|
//! references. You can still `clone` to obtain an owned node, but you only pay that cost when you need to.
|
||||||
//!
|
//!
|
||||||
//! ## Getting Started
|
//! ## Getting Started
|
||||||
//! The main entry points for constructing syntax trees are [`GreenNodeBuilder`] and [`SyntaxNode::new_root`] for green
|
//! If you're looking at `cstree`, you're probably looking at or already writing a parser and are considering using
|
||||||
//! and red trees respectively. See `examples/s_expressions.rs` for a guided tutorial to `cstree`.
|
//! concrete syntax trees as its output. We'll talk more about parsing below -- first, let's have a look at what needs
|
||||||
|
//! to happen to go from input text to a `cstree` syntax tree:
|
||||||
|
//!
|
||||||
|
//! 1. Define an enumeration of the types of tokens (like keywords) and nodes (like "an expression") that you want to
|
||||||
|
//! have in your syntax and implement [`Language`]
|
||||||
|
//!
|
||||||
|
//! 2. Create a [`GreenNodeBuilder`](build::GreenNodeBuilder) and call
|
||||||
|
//! [`start_node`](build::GreenNodeBuilder::start_node), [`token`](build::GreenNodeBuilder::token) and
|
||||||
|
//! [`finish_node`](build::GreenNodeBuilder::finish_node) from your parser
|
||||||
|
//!
|
||||||
|
//! 3. Call [`SyntaxNode::new_root`](syntax::SyntaxNode::new_root) or
|
||||||
|
//! [`SyntaxNode::new_root_with_resolver`](syntax::SyntaxNode::new_root_with_resolver) with the resulting
|
||||||
|
//! [`GreenNode`](green::GreenNode) to obtain a syntax tree that you can traverse
|
||||||
|
//!
|
||||||
|
//! Let's walk through the motions of parsing a (very) simple language into `cstree` syntax trees.
|
||||||
|
//! We'll just support addition and subtraction on integers, from which the user is allowed to construct a single,
|
||||||
|
//! compound expression. They will, however, be allowed to write nested expressions in parentheses, like `1 - (2 + 5)`.
|
||||||
|
//!
|
||||||
|
//! ### Defining the language
|
||||||
|
//!
|
||||||
|
//! First, we need to list the different part of our language's grammar.
|
||||||
|
//! We can do that using an `enum` with a unit variant for any terminal and non-terminal.
|
||||||
|
//! The `enum` needs to be convertible to a `u16`, so we use the `repr` attribute to ensure it uses the correct
|
||||||
|
//! representation.
|
||||||
|
//!
|
||||||
|
//! ```rust,ignore
|
||||||
|
//! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
//! #[repr(u16)]
|
||||||
|
//! enum SyntaxKind {
|
||||||
|
//! /* Tokens */
|
||||||
|
//! Int, // 42
|
||||||
|
//! Plus, // +
|
||||||
|
//! Minus, // -
|
||||||
|
//! LParen, // (
|
||||||
|
//! RParen, // )
|
||||||
|
//! /* Nodes */
|
||||||
|
//! Expr,
|
||||||
|
//! Root,
|
||||||
|
//! }
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! Most of these are tokens to lex the input string into, like numbers (`Int`) and operators (`Plus`, `Minus`).
|
||||||
|
//! We only really need one type of node; expressions.
|
||||||
|
//! Our syntax tree's root node will have the special kind `Root`, all other nodes will be
|
||||||
|
//! expressions containing a sequence of arithmetic operations potentially involving further, nested
|
||||||
|
//! expression nodes.
|
||||||
|
//!
|
||||||
|
//! To use our `SyntaxKind`s with `cstree`, we need to tell it how to convert it back to just a number (the
|
||||||
|
//! `#[repr(u16)]` that we added) by implementing the [`Language`] trait. We can also tell `cstree` about tokens that
|
||||||
|
//! always have the same text through the `static_text` method on the trait. This is useful for the operators and
|
||||||
|
//! parentheses, but not possible for numbers, since an integer token may be produced from the input `3`, but also from
|
||||||
|
//! other numbers like `7` or `12`. We implement `Language` on an empty type, just so we can give it a name.
|
||||||
|
//!
|
||||||
|
//! ```rust,ignore
|
||||||
|
//! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
//! pub struct Calculator;
|
||||||
|
//! impl Language for Calculator {
|
||||||
|
//! // The tokens and nodes we just defined
|
||||||
|
//! type Kind = SyntaxKind;
|
||||||
|
//!
|
||||||
|
//! fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||||
|
//! // This just needs to be the inverse of `kind_to_raw`, but could also
|
||||||
|
//! // be an `impl TryFrom<u16> for SyntaxKind` or any other conversion.
|
||||||
|
//! match raw.0 {
|
||||||
|
//! 0 => SyntaxKind::Int,
|
||||||
|
//! 1 => SyntaxKind::Plus,
|
||||||
|
//! 2 => SyntaxKind::Minus,
|
||||||
|
//! 3 => SyntaxKind::LParen,
|
||||||
|
//! 4 => SyntaxKind::RParen,
|
||||||
|
//! 5 => SyntaxKind::Expr,
|
||||||
|
//! 6 => SyntaxKind::Root,
|
||||||
|
//! n => panic!("Unknown raw syntax kind: {n}"),
|
||||||
|
//! }
|
||||||
|
//! }
|
||||||
|
//!
|
||||||
|
//! fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||||
|
//! RawSyntaxKind(kind as u16)
|
||||||
|
//! }
|
||||||
|
//!
|
||||||
|
//! fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||||
|
//! match kind {
|
||||||
|
//! SyntaxKind::Plus => Some("+"),
|
||||||
|
//! SyntaxKind::Minus => Some("-"),
|
||||||
|
//! SyntaxKind::LParen => Some("("),
|
||||||
|
//! SyntaxKind::RParen => Some(")"),
|
||||||
|
//! _ => None,
|
||||||
|
//! }
|
||||||
|
//! }
|
||||||
|
//! }
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ### Parsing into a green tree
|
||||||
|
//! With that out of the way, we can start writing the parser for our expressions.
|
||||||
|
//! For the purposes of this introduction to `cstree`, I'll assume that there is a lexer that yields the following
|
||||||
|
//! tokens:
|
||||||
|
//!
|
||||||
|
//! ```rust,ignore
|
||||||
|
//! #[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||||
|
//! pub enum Token<'input> {
|
||||||
|
//! // Note that number strings are not yet parsed into actual numbers,
|
||||||
|
//! // we just remember the slice of the input that contains their digits
|
||||||
|
//! Int(&'input str),
|
||||||
|
//! Plus,
|
||||||
|
//! Minus,
|
||||||
|
//! LParen,
|
||||||
|
//! RParen,
|
||||||
|
//! // A special token that indicates that we have reached the end of the file
|
||||||
|
//! EoF,
|
||||||
|
//! }
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! A simple lexer that yields such tokens is part of the full `readme` example, but we'll be busy enough with the
|
||||||
|
//! combination of `cstree` and the actual parser, which we define like this:
|
||||||
|
//!
|
||||||
|
//! ```rust,ignore
|
||||||
|
//! pub struct Parser<'input> {
|
||||||
|
//! // `Peekable` is a standard library iterator adapter that allows
|
||||||
|
//! // looking ahead at the next item without removing it from the iterator yet
|
||||||
|
//! lexer: Peekable<Lexer<'input>>,
|
||||||
|
//! builder: GreenNodeBuilder<'static, 'static, Calculator>,
|
||||||
|
//! }
|
||||||
|
//!
|
||||||
|
//! impl<'input> Parser<'input> {
|
||||||
|
//! pub fn new(input: &'input str) -> Self {
|
||||||
|
//! Self {
|
||||||
|
//! // we get `peekable` from implementing `Iterator` on `Lexer`
|
||||||
|
//! lexer: Lexer::new(input).peekable(),
|
||||||
|
//! builder: GreenNodeBuilder::new(),
|
||||||
|
//! }
|
||||||
|
//! }
|
||||||
|
//!
|
||||||
|
//! pub fn bump(&mut self) -> Option<Token<'input>> {
|
||||||
|
//! self.lexer.next()
|
||||||
|
//! }
|
||||||
|
//! }
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! In contrast to parsers that return abstract syntax trees, with `cstree` the syntax tree nodes
|
||||||
|
//! for all element in the language grammar will have the same type: [`GreenNode`](green::GreenNode)
|
||||||
|
//! for the inner ("green") tree and [`SyntaxNode`](syntax::SyntaxNode) for the outer ("red") tree.
|
||||||
|
//! Different kinds of nodes (and tokens) are differentiated by their `SyntaxKind` tag, which we defined above.
|
||||||
|
//!
|
||||||
|
//! You can implement many types of parsers with `cstree`. To get a feel for how it works, consider
|
||||||
|
//! a typical recursive descent parser. With a more traditional AST, one would define different AST
|
||||||
|
//! structs for struct or function definitions, statements, expressions and so on. Inside the
|
||||||
|
//! parser, the components of any element, such as all fields of a struct or all statements inside a
|
||||||
|
//! function, are parsed first and then the parser wraps them in the matching AST type, which is
|
||||||
|
//! returned from the corresponding parser function.
|
||||||
|
//!
|
||||||
|
//! Because `cstree`'s syntax trees are untyped, there is no explicit AST representation that the
|
||||||
|
//! parser would build. Instead, parsing into a CST using the
|
||||||
|
//! [`GreenNodeBuilder`](build::GreenNodeBuilder) follows the source code more closely in that you
|
||||||
|
//! tell `cstree` about each new element you enter and all tokens that the parser consumes. So, for
|
||||||
|
//! example, to parse a struct definition the parser first "enters" the struct definition node, then
|
||||||
|
//! parses the `struct` keyword and type name, then parses each field, and finally "finishes"
|
||||||
|
//! parsing the struct node.
|
||||||
|
//!
|
||||||
|
//! The most trivial example is the root node for our parser, which just creates a root node
|
||||||
|
//! containing the whole expression (we could do without a specific root node if any expression was
|
||||||
|
//! a node, in particular if we wrapped integer literal tokens inside `Expr` nodes).
|
||||||
|
//!
|
||||||
|
//! ```rust,ignore
|
||||||
|
//! pub fn parse(&mut self) -> Result<(), String> {
|
||||||
|
//! self.builder.start_node(SyntaxKind::Root);
|
||||||
|
//! self.parse_expr()?;
|
||||||
|
//! self.builder.finish_node();
|
||||||
|
//! Ok(())
|
||||||
|
//! }
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! As there isn't a static AST type to return, the parser is very flexible as to what is part of a
|
||||||
|
//! node. In the previous example, if the user is adding a new field to the struct and has not yet
|
||||||
|
//! typed the field's type, the CST node for the struct doesn't care if there is no child node for
|
||||||
|
//! it. Similarly, if the user is deleting fields and the source code currently contains a leftover
|
||||||
|
//! field name, this additional identifier can be a part of the struct node without any
|
||||||
|
//! modifications to the syntax tree definition. This property is the key to why CSTs are such a
|
||||||
|
//! good fit as a lossless input representation, which necessitates the syntax tree to mirror the
|
||||||
|
//! user-specific layout of whitespace and comments around the AST items.
|
||||||
|
//!
|
||||||
|
//! In the parser for our simple expression language, we'll also have to deal with the fact that,
|
||||||
|
//! when we see a number the parser doesn't yet know whether there will be additional operations
|
||||||
|
//! following that number. That is, in the expression `1 + 2`, it can only know that it is parsing
|
||||||
|
//! a binary operation once it sees the `+`. The event-like model of building trees in `cstree`,
|
||||||
|
//! however, implies that when reaching the `+`, the parser would have to have already entered an
|
||||||
|
//! expression node in order for the whole input to be part of the expression.
|
||||||
|
//!
|
||||||
|
//! To get around this, `GreenNodeBuilder` provides the
|
||||||
|
//! [`checkpoint`](build::GreenNodeBuilder::checkpoint) method, which we can call to "remember" the
|
||||||
|
//! current position in the input. For example, we can create a checkpoint before the parser parses
|
||||||
|
//! the first `1`. Later, when it sees the following `+`, it can create an `Expr` node for the
|
||||||
|
//! whole expression using [`start_node_at`](build::GreenNodeBuilder::start_node_at):
|
||||||
|
//!
|
||||||
|
//! ```rust,ignore
|
||||||
|
//! fn parse_lhs(&mut self) -> Result<(), String> {
|
||||||
|
//! // An expression may start either with a number, or with an opening parenthesis that is
|
||||||
|
//! // the start of a parenthesized expression
|
||||||
|
//! let next_token = *self.lexer.peek().unwrap();
|
||||||
|
//! match next_token {
|
||||||
|
//! Token::Int(n) => {
|
||||||
|
//! self.bump();
|
||||||
|
//! self.builder.token(SyntaxKind::Int, n);
|
||||||
|
//! }
|
||||||
|
//! Token::LParen => {
|
||||||
|
//! // Wrap the grouped expression inside a node containing it and its parentheses
|
||||||
|
//! self.builder.start_node(SyntaxKind::Expr);
|
||||||
|
//! self.bump();
|
||||||
|
//! self.builder.static_token(SyntaxKind::LParen);
|
||||||
|
//! self.parse_expr()?; // Inner expression
|
||||||
|
//! if self.bump() != Some(Token::RParen) {
|
||||||
|
//! return Err("Missing ')'".to_string());
|
||||||
|
//! }
|
||||||
|
//! self.builder.static_token(SyntaxKind::RParen);
|
||||||
|
//! self.builder.finish_node();
|
||||||
|
//! }
|
||||||
|
//! Token::EoF => return Err("Unexpected end of file: expected expression".to_string()),
|
||||||
|
//! t => return Err(format!("Unexpected start of expression: '{t:?}'")),
|
||||||
|
//! }
|
||||||
|
//! Ok(())
|
||||||
|
//! }
|
||||||
|
//!
|
||||||
|
//! fn parse_expr(&mut self) -> Result<(), String> {
|
||||||
|
//! // Remember our current position
|
||||||
|
//! let before_expr = self.builder.checkpoint();
|
||||||
|
//!
|
||||||
|
//! // Parse the start of the expression
|
||||||
|
//! self.parse_lhs()?;
|
||||||
|
//!
|
||||||
|
//! // Check if the expression continues with `+ <more>` or `- <more>`
|
||||||
|
//! let Some(next_token) = self.lexer.peek() else {
|
||||||
|
//! return Ok(());
|
||||||
|
//! };
|
||||||
|
//! let op = match *next_token {
|
||||||
|
//! Token::Plus => SyntaxKind::Plus,
|
||||||
|
//! Token::Minus => SyntaxKind::Minus,
|
||||||
|
//! Token::RParen | Token::EoF => return Ok(()),
|
||||||
|
//! t => return Err(format!("Expected operator, found '{t:?}'")),
|
||||||
|
//! };
|
||||||
|
//!
|
||||||
|
//! // If so, retroactively wrap the (already parsed) LHS and the following RHS
|
||||||
|
//! // inside an `Expr` node
|
||||||
|
//! self.builder.start_node_at(before_expr, SyntaxKind::Expr);
|
||||||
|
//! self.bump();
|
||||||
|
//! self.builder.static_token(op);
|
||||||
|
//! self.parse_expr()?; // RHS
|
||||||
|
//! self.builder.finish_node();
|
||||||
|
//! Ok(())
|
||||||
|
//! }
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ### Obtaining the parser result
|
||||||
|
//!
|
||||||
|
//! Our parser is now capable of parsing our little arithmetic language, but it's methods don't
|
||||||
|
//! return anything. So how do we get our syntax tree out? The answer lies in
|
||||||
|
//! [`GreenNodeBuilder::finish`](build::GreenNodeBuilder::finish), which finally returns the tree
|
||||||
|
//! that we have painstakingly constructed.
|
||||||
|
//!
|
||||||
|
//! ```rust,ignore
|
||||||
|
//! impl Parser<'_> {
|
||||||
|
//! pub fn finish(mut self) -> (GreenNode, impl Interner) {
|
||||||
|
//! assert!(self.lexer.next().map(|t| t == Token::EoF).unwrap_or(true));
|
||||||
|
//! let (tree, cache) = self.builder.finish();
|
||||||
|
//! (tree, cache.unwrap().into_interner().unwrap())
|
||||||
|
//! }
|
||||||
|
//! }
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! `finish` also returns the cache it used to deduplicate tree nodes and tokens, so you can re-use
|
||||||
|
//! it for parsing related inputs (e.g., different source files from the same crate may share a lot
|
||||||
|
//! of common function and type names that can be deduplicated). See `GreenNodeBuilder`'s
|
||||||
|
//! documentation for more information on this, in particular the `with_cache` and `from_cache`
|
||||||
|
//! methods. Most importantly for us, we can extract the [`Interner`](interning::Interner) that
|
||||||
|
//! contains the source text of the tree's tokens from the cache, which we need if we want to look
|
||||||
|
//! up things like variable names or the value of numbers for our calculator.
|
||||||
|
//!
|
||||||
|
//! To work with the syntax tree, you'll want to upgrade it to a [`SyntaxNode`](syntax::SyntaxNode)
|
||||||
|
//! using [`SyntaxNode::new_root`](syntax::SyntaxNode::new_root). You can also use
|
||||||
|
//! [`SyntaxNode::new_root_with_resolver`](syntax::SyntaxNode::new_root_with_resolver) to combine
|
||||||
|
//! tree and interner, which lets you directly retrieve source text and makes the nodes implement
|
||||||
|
//! `Display` and `Debug`. The same output can be produced from `SyntaxNode`s by calling the
|
||||||
|
//! `debug` or `display` method with a [`Resolver`](interning::Resolver). To visualize the whole
|
||||||
|
//! syntax tree, pass `true` for the `recursive` parameter on `debug`, or simply debug-print a
|
||||||
|
//! [`ResolvedNode`](syntax::ResolvedNode):
|
||||||
|
//!
|
||||||
|
//! ```rust,ignore
|
||||||
|
//! let input = "11 + 2-(5 + 4)";
|
||||||
|
//! let mut parser = Parser::new(input);
|
||||||
|
//! parser.parse().unwrap();
|
||||||
|
//! let (tree, interner) = parser.finish();
|
||||||
|
//! let root = SyntaxNode::<Calculator>::new_root_with_resolver(tree, interner);
|
||||||
|
//! dbg!(root);
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! ### Further examples
|
||||||
|
//! The parser we just built is available in full in the runnable `readme` example, which includes some additional code
|
||||||
|
//! to read expressions from the terminal and evaluate the parsed expressions - have it do a few calculations if you
|
||||||
|
//! like.
|
||||||
|
//! There are several more examples in the `examples/` folder in the repository.
|
||||||
|
//! A good starting point is the `s_expressions` example, which implements a parser for a small S-Expression language
|
||||||
|
//! with guiding comments.
|
||||||
//!
|
//!
|
||||||
//! ## AST Layer
|
//! ## AST Layer
|
||||||
//! While `cstree` is built for concrete syntax trees, applications are quite easily able to work with either a CST or
|
//! While `cstree` is built for concrete syntax trees, applications are quite easily able to work with either a CST or
|
||||||
//! an AST representation, or freely switch between them. To do so, use `cstree` to build syntax and underlying green
|
//! an AST representation, or freely switch between them. To do so, use `cstree` to build syntax and underlying green
|
||||||
//! tree and provide AST wrappers for your different kinds of nodes. An example of how this is done can be seen [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/crates/syntax/src/ast/generated.rs) and [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/crates/syntax/src/ast/generated/nodes.rs) (note that the latter file is automatically generated by a task).
|
//! tree and provide AST wrappers for your different kinds of nodes. An example of how this is done can be seen [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/crates/syntax/src/ast/generated.rs)
|
||||||
|
//! and [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/crates/syntax/src/ast/generated/nodes.rs)
|
||||||
|
//! (note that the latter file is automatically generated by a task using [`ungrammar`](https://crates.io/crates/ungrammar)).
|
||||||
|
|
||||||
#![forbid(missing_debug_implementations, unconditional_recursion)]
|
#![forbid(missing_debug_implementations, unconditional_recursion)]
|
||||||
#![deny(unsafe_code, missing_docs, future_incompatible)]
|
#![deny(unsafe_code, future_incompatible)]
|
||||||
#![allow(unstable_name_collisions)] // strict provenance - must come after `future_incompatible` to take precedence
|
#![allow(unstable_name_collisions)] // strict provenance - must come after `future_incompatible` to take precedence
|
||||||
|
#![warn(missing_docs)]
|
||||||
|
// Docs.rs
|
||||||
|
#![doc(html_root_url = "https://docs.rs/cstree/0.12.0-rc.0")]
|
||||||
|
#![cfg_attr(doc_cfg, feature(doc_cfg))]
|
||||||
|
|
||||||
#[allow(unsafe_code)]
|
#[allow(unsafe_code)]
|
||||||
mod green;
|
pub mod green;
|
||||||
#[allow(unsafe_code)]
|
#[allow(unsafe_code)]
|
||||||
mod syntax;
|
pub mod syntax;
|
||||||
|
|
||||||
|
#[allow(unsafe_code)]
|
||||||
|
pub mod interning;
|
||||||
|
|
||||||
#[cfg(feature = "serialize")]
|
#[cfg(feature = "serialize")]
|
||||||
mod serde_impls;
|
mod serde_impls;
|
||||||
#[allow(missing_docs)]
|
#[allow(missing_docs)]
|
||||||
mod utility_types;
|
mod utility_types;
|
||||||
|
|
||||||
pub mod interning;
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
// Reexport types for working with strings.
|
/// `RawSyntaxKind` is a type tag for each token or node.
|
||||||
pub use text_size::{TextLen, TextRange, TextSize};
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct RawSyntaxKind(pub u16);
|
||||||
|
|
||||||
#[doc(inline)]
|
/// Typesafe representations of text ranges and sizes.
|
||||||
pub use crate::syntax::*;
|
pub mod text {
|
||||||
pub use crate::{
|
pub use crate::syntax::SyntaxText;
|
||||||
green::{Checkpoint, GreenNode, GreenNodeBuilder, GreenNodeChildren, GreenToken, NodeCache, SyntaxKind},
|
pub use text_size::{TextLen, TextRange, TextSize};
|
||||||
utility_types::{Direction, NodeOrToken, TokenAtOffset, WalkEvent},
|
}
|
||||||
};
|
|
||||||
pub use triomphe::Arc;
|
/// A tree builder for the construction of syntax trees.
|
||||||
|
///
|
||||||
|
/// Please refer to the documentation on [`GreenNodeBuilder`](build::GreenNodeBuilder) itself and the ["getting started"
|
||||||
|
/// section](../index.html#getting-started) from the top-level documentation for an introduction to how to build a
|
||||||
|
/// syntax tree.
|
||||||
|
pub mod build {
|
||||||
|
pub use crate::green::builder::{Checkpoint, GreenNodeBuilder, NodeCache};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A convenient collection of the most used parts of `cstree`.
|
||||||
|
pub mod prelude {
|
||||||
|
pub use crate::{
|
||||||
|
build::GreenNodeBuilder,
|
||||||
|
green::{GreenNode, GreenToken},
|
||||||
|
syntax::{SyntaxElement, SyntaxNode, SyntaxToken},
|
||||||
|
Language, RawSyntaxKind,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Types for syntax tree traversal / moving through trees.
|
||||||
|
pub mod traversal {
|
||||||
|
pub use crate::utility_types::{Direction, WalkEvent};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Utility types. It shouldn't be needed to reference these directly, but they are returned in several places in
|
||||||
|
/// `cstree` and may come in handy.
|
||||||
|
pub mod util {
|
||||||
|
pub use crate::utility_types::{NodeOrToken, TokenAtOffset};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Synchronization primitives.
|
||||||
|
pub mod sync {
|
||||||
|
/// An atomically reference counted shared pointer.
|
||||||
|
///
|
||||||
|
/// This is like [`Arc`](std::sync::Arc) in the standard library, but more efficient for how `cstree` stores
|
||||||
|
/// syntax trees internally. This Arc does not support weak reference counting.
|
||||||
|
pub use triomphe::Arc;
|
||||||
|
}
|
||||||
|
|
||||||
/// The `Language` trait is the bridge between the internal `cstree` representation and your
|
/// The `Language` trait is the bridge between the internal `cstree` representation and your
|
||||||
/// language's types.
|
/// language's types.
|
||||||
|
@ -97,13 +441,13 @@ pub use triomphe::Arc;
|
||||||
/// impl cstree::Language for Lang {
|
/// impl cstree::Language for Lang {
|
||||||
/// type Kind = SyntaxKind;
|
/// type Kind = SyntaxKind;
|
||||||
///
|
///
|
||||||
/// fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
|
/// fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||||
/// assert!(raw.0 <= __LAST as u16);
|
/// assert!(raw.0 <= __LAST as u16);
|
||||||
/// unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
/// unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||||
/// }
|
/// }
|
||||||
///
|
///
|
||||||
/// fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
|
/// fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||||
/// cstree::SyntaxKind(kind as u16)
|
/// cstree::RawSyntaxKind(kind as u16)
|
||||||
/// }
|
/// }
|
||||||
///
|
///
|
||||||
/// fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
/// fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||||
|
@ -115,29 +459,34 @@ pub use triomphe::Arc;
|
||||||
/// }
|
/// }
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
|
///
|
||||||
|
/// [`SyntaxNode`]: crate::syntax::SyntaxNode
|
||||||
pub trait Language: Sized + Clone + Copy + fmt::Debug + Eq + Ord + std::hash::Hash {
|
pub trait Language: Sized + Clone + Copy + fmt::Debug + Eq + Ord + std::hash::Hash {
|
||||||
/// A type that represents what items in your Language can be.
|
/// A type that represents what items in your Language can be.
|
||||||
/// Typically, this is an `enum` with variants such as `Identifier`, `Literal`, ...
|
/// Typically, this is an `enum` with variants such as `Identifier`, `Literal`, ...
|
||||||
type Kind: Sized + Clone + Copy + fmt::Debug;
|
type Kind: Sized + Clone + Copy + fmt::Debug;
|
||||||
|
|
||||||
/// Construct a semantic item kind from the compact representation.
|
/// Construct a semantic item kind from the compact representation.
|
||||||
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind;
|
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind;
|
||||||
|
|
||||||
/// Convert a semantic item kind into a more compact representation.
|
/// Convert a semantic item kind into a more compact representation.
|
||||||
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind;
|
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind;
|
||||||
|
|
||||||
/// Fixed text for a particular syntax kind.
|
/// Fixed text for a particular syntax kind.
|
||||||
///
|
|
||||||
/// Implement for kinds that will only ever represent the same text, such as punctuation (like a
|
/// Implement for kinds that will only ever represent the same text, such as punctuation (like a
|
||||||
/// semicolon), keywords (like `fn`), or operators (like `<=`).
|
/// semicolon), keywords (like `fn`), or operators (like `<=`).
|
||||||
|
///
|
||||||
|
/// Indicating tokens that have a `static_text` this way allows `cstree` to store them more efficiently, which makes
|
||||||
|
/// it faster to add them to a syntax tree and to look up their text. Since there can often be many occurrences
|
||||||
|
/// of these tokens inside a file, doing so will improve the performance of using `cstree`.
|
||||||
fn static_text(kind: Self::Kind) -> Option<&'static str>;
|
fn static_text(kind: Self::Kind) -> Option<&'static str>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
#[allow(unsafe_code, unused)]
|
#[allow(unsafe_code, unused)]
|
||||||
pub mod testing {
|
pub mod testing {
|
||||||
pub use crate::*;
|
pub use crate::prelude::*;
|
||||||
pub fn parse<L: Language, I>(_b: &mut super::GreenNodeBuilder<L, I>, _s: &str) {}
|
pub fn parse<L: Language, I>(_b: &mut GreenNodeBuilder<L, I>, _s: &str) {}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
#[repr(u16)]
|
#[repr(u16)]
|
||||||
|
@ -160,13 +509,13 @@ pub mod testing {
|
||||||
impl Language for TestLang {
|
impl Language for TestLang {
|
||||||
type Kind = TestSyntaxKind;
|
type Kind = TestSyntaxKind;
|
||||||
|
|
||||||
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
|
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||||
assert!(raw.0 <= TestSyntaxKind::__LAST as u16);
|
assert!(raw.0 <= TestSyntaxKind::__LAST as u16);
|
||||||
unsafe { std::mem::transmute::<u16, TestSyntaxKind>(raw.0) }
|
unsafe { std::mem::transmute::<u16, TestSyntaxKind>(raw.0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||||
SyntaxKind(kind as u16)
|
RawSyntaxKind(kind as u16)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||||
|
|
|
@ -1,8 +1,12 @@
|
||||||
//! Serialization and Deserialization for syntax trees.
|
//! Serialization and Deserialization for syntax trees.
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
interning::{IntoResolver, Resolver},
|
build::GreenNodeBuilder,
|
||||||
GreenNodeBuilder, Language, NodeOrToken, ResolvedNode, SyntaxKind, SyntaxNode, WalkEvent,
|
interning::{Resolver, TokenKey},
|
||||||
|
syntax::{ResolvedNode, SyntaxNode},
|
||||||
|
traversal::WalkEvent,
|
||||||
|
util::NodeOrToken,
|
||||||
|
Language, RawSyntaxKind,
|
||||||
};
|
};
|
||||||
use serde::{
|
use serde::{
|
||||||
de::{Error, SeqAccess, Visitor},
|
de::{Error, SeqAccess, Visitor},
|
||||||
|
@ -77,8 +81,8 @@ enum Event<'text> {
|
||||||
/// The second parameter indicates if this node needs data.
|
/// The second parameter indicates if this node needs data.
|
||||||
/// If the boolean is true, the next element inside the data list
|
/// If the boolean is true, the next element inside the data list
|
||||||
/// must be attached to this node.
|
/// must be attached to this node.
|
||||||
EnterNode(SyntaxKind, bool),
|
EnterNode(RawSyntaxKind, bool),
|
||||||
Token(SyntaxKind, &'text str),
|
Token(RawSyntaxKind, &'text str),
|
||||||
LeaveNode,
|
LeaveNode,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,7 +101,7 @@ pub(crate) struct SerializeWithData<'node, 'resolver, L: Language, D: 'static, R
|
||||||
impl<L, D, R> Serialize for SerializeWithData<'_, '_, L, D, R>
|
impl<L, D, R> Serialize for SerializeWithData<'_, '_, L, D, R>
|
||||||
where
|
where
|
||||||
L: Language,
|
L: Language,
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
D: Serialize,
|
D: Serialize,
|
||||||
{
|
{
|
||||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
@ -112,7 +116,7 @@ where
|
||||||
impl<L, D, R> Serialize for SerializeWithResolver<'_, '_, L, D, R>
|
impl<L, D, R> Serialize for SerializeWithResolver<'_, '_, L, D, R>
|
||||||
where
|
where
|
||||||
L: Language,
|
L: Language,
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
where
|
where
|
||||||
|
@ -192,8 +196,7 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
let (tree, cache) = builder.finish();
|
let (tree, cache) = builder.finish();
|
||||||
let tree =
|
let tree = ResolvedNode::new_root_with_resolver(tree, cache.unwrap().into_interner().unwrap());
|
||||||
ResolvedNode::new_root_with_resolver(tree, cache.unwrap().into_interner().unwrap().into_resolver());
|
|
||||||
Ok((tree, data_indices))
|
Ok((tree, data_indices))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -236,7 +239,7 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Serialize for SyntaxKind {
|
impl Serialize for RawSyntaxKind {
|
||||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
where
|
where
|
||||||
S: serde::Serializer,
|
S: serde::Serializer,
|
||||||
|
@ -245,7 +248,7 @@ impl Serialize for SyntaxKind {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'de> Deserialize<'de> for SyntaxKind {
|
impl<'de> Deserialize<'de> for RawSyntaxKind {
|
||||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||||
where
|
where
|
||||||
D: serde::Deserializer<'de>,
|
D: serde::Deserializer<'de>,
|
||||||
|
|
|
@ -1,10 +1,14 @@
|
||||||
use std::{fmt, sync::atomic::AtomicU32};
|
use std::{fmt, sync::atomic::AtomicU32};
|
||||||
|
|
||||||
use lasso::Resolver;
|
|
||||||
use text_size::{TextRange, TextSize};
|
use text_size::{TextRange, TextSize};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::{green::GreenElementRef, Language, NodeOrToken, SyntaxKind, TokenAtOffset};
|
use crate::{
|
||||||
|
green::GreenElementRef,
|
||||||
|
interning::{Resolver, TokenKey},
|
||||||
|
util::{NodeOrToken, TokenAtOffset},
|
||||||
|
Language, RawSyntaxKind,
|
||||||
|
};
|
||||||
|
|
||||||
/// An element of the tree, can be either a node or a token.
|
/// An element of the tree, can be either a node or a token.
|
||||||
pub type SyntaxElement<L, D = ()> = NodeOrToken<SyntaxNode<L, D>, SyntaxToken<L, D>>;
|
pub type SyntaxElement<L, D = ()> = NodeOrToken<SyntaxNode<L, D>, SyntaxToken<L, D>>;
|
||||||
|
@ -27,7 +31,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
|
||||||
/// To avoid allocating for every element, see [`write_display`](type.SyntaxElement.html#method.write_display).
|
/// To avoid allocating for every element, see [`write_display`](type.SyntaxElement.html#method.write_display).
|
||||||
pub fn display<R>(&self, resolver: &R) -> String
|
pub fn display<R>(&self, resolver: &R) -> String
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.display(resolver),
|
NodeOrToken::Node(it) => it.display(resolver),
|
||||||
|
@ -38,7 +42,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
|
||||||
/// Writes this element's [`Display`](fmt::Display) representation into the given `target`.
|
/// Writes this element's [`Display`](fmt::Display) representation into the given `target`.
|
||||||
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.write_display(resolver, target),
|
NodeOrToken::Node(it) => it.write_display(resolver, target),
|
||||||
|
@ -53,7 +57,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
|
||||||
/// To avoid allocating for every element, see [`write_debug`](type.SyntaxElement.html#method.write_debug).
|
/// To avoid allocating for every element, see [`write_debug`](type.SyntaxElement.html#method.write_debug).
|
||||||
pub fn debug<R>(&self, resolver: &R, recursive: bool) -> String
|
pub fn debug<R>(&self, resolver: &R, recursive: bool) -> String
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.debug(resolver, recursive),
|
NodeOrToken::Node(it) => it.debug(resolver, recursive),
|
||||||
|
@ -66,7 +70,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
|
||||||
/// Otherwise, only this element's kind and range are written.
|
/// Otherwise, only this element's kind and range are written.
|
||||||
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result
|
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.write_debug(resolver, target, recursive),
|
NodeOrToken::Node(it) => it.write_debug(resolver, target, recursive),
|
||||||
|
@ -105,7 +109,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
|
||||||
/// To avoid allocating for every element, see [`write_display`](type.SyntaxElementRef.html#method.write_display).
|
/// To avoid allocating for every element, see [`write_display`](type.SyntaxElementRef.html#method.write_display).
|
||||||
pub fn display<R>(&self, resolver: &R) -> String
|
pub fn display<R>(&self, resolver: &R) -> String
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.display(resolver),
|
NodeOrToken::Node(it) => it.display(resolver),
|
||||||
|
@ -116,7 +120,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
|
||||||
/// Writes this element's [`Display`](fmt::Display) representation into the given `target`.
|
/// Writes this element's [`Display`](fmt::Display) representation into the given `target`.
|
||||||
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.write_display(resolver, target),
|
NodeOrToken::Node(it) => it.write_display(resolver, target),
|
||||||
|
@ -131,7 +135,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
|
||||||
/// To avoid allocating for every element, see [`write_debug`](type.SyntaxElementRef.html#method.write_debug).
|
/// To avoid allocating for every element, see [`write_debug`](type.SyntaxElementRef.html#method.write_debug).
|
||||||
pub fn debug<R>(&self, resolver: &R, recursive: bool) -> String
|
pub fn debug<R>(&self, resolver: &R, recursive: bool) -> String
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.debug(resolver, recursive),
|
NodeOrToken::Node(it) => it.debug(resolver, recursive),
|
||||||
|
@ -144,7 +148,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
|
||||||
/// Otherwise, only this element's kind and range are written.
|
/// Otherwise, only this element's kind and range are written.
|
||||||
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result
|
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.write_debug(resolver, target, recursive),
|
NodeOrToken::Node(it) => it.write_debug(resolver, target, recursive),
|
||||||
|
@ -162,8 +166,8 @@ impl<L: Language, D> SyntaxElement<L, D> {
|
||||||
ref_count: *mut AtomicU32,
|
ref_count: *mut AtomicU32,
|
||||||
) -> SyntaxElement<L, D> {
|
) -> SyntaxElement<L, D> {
|
||||||
match element {
|
match element {
|
||||||
NodeOrToken::Node(node) => SyntaxNode::new_child(node, parent, index as u32, offset, ref_count).into(),
|
NodeOrToken::Node(node) => SyntaxNode::new_child(node, parent, index, offset, ref_count).into(),
|
||||||
NodeOrToken::Token(_) => SyntaxToken::new(parent, index as u32, offset).into(),
|
NodeOrToken::Token(_) => SyntaxToken::new(parent, index, offset).into(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -178,7 +182,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
|
||||||
|
|
||||||
/// The internal representation of the kind of this element.
|
/// The internal representation of the kind of this element.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn syntax_kind(&self) -> SyntaxKind {
|
pub fn syntax_kind(&self) -> RawSyntaxKind {
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.syntax_kind(),
|
NodeOrToken::Node(it) => it.syntax_kind(),
|
||||||
NodeOrToken::Token(it) => it.syntax_kind(),
|
NodeOrToken::Token(it) => it.syntax_kind(),
|
||||||
|
@ -261,7 +265,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
|
||||||
|
|
||||||
/// The internal representation of the kind of this element.
|
/// The internal representation of the kind of this element.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn syntax_kind(&self) -> SyntaxKind {
|
pub fn syntax_kind(&self) -> RawSyntaxKind {
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.syntax_kind(),
|
NodeOrToken::Node(it) => it.syntax_kind(),
|
||||||
NodeOrToken::Token(it) => it.syntax_kind(),
|
NodeOrToken::Token(it) => it.syntax_kind(),
|
||||||
|
|
|
@ -4,7 +4,11 @@ use std::iter::FusedIterator;
|
||||||
|
|
||||||
use text_size::TextSize;
|
use text_size::TextSize;
|
||||||
|
|
||||||
use crate::{green::GreenElementRef, GreenNodeChildren, Language, SyntaxElementRef, SyntaxNode};
|
use crate::{
|
||||||
|
green::{GreenElementRef, GreenNodeChildren},
|
||||||
|
syntax::{SyntaxElementRef, SyntaxNode},
|
||||||
|
Language,
|
||||||
|
};
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
struct Iter<'n> {
|
struct Iter<'n> {
|
||||||
|
|
|
@ -36,6 +36,7 @@ pub use text::SyntaxText;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use super::*;
|
||||||
use crate::testing::*;
|
use crate::testing::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -2,9 +2,12 @@ use super::*;
|
||||||
#[cfg(feature = "serialize")]
|
#[cfg(feature = "serialize")]
|
||||||
use crate::serde_impls::{SerializeWithData, SerializeWithResolver};
|
use crate::serde_impls::{SerializeWithData, SerializeWithResolver};
|
||||||
use crate::{
|
use crate::{
|
||||||
green::{GreenElementRef, SyntaxKind},
|
green::{GreenElementRef, GreenNode},
|
||||||
interning::Resolver,
|
interning::{Resolver, TokenKey},
|
||||||
*,
|
text::*,
|
||||||
|
traversal::*,
|
||||||
|
util::*,
|
||||||
|
Language, RawSyntaxKind,
|
||||||
};
|
};
|
||||||
use parking_lot::RwLock;
|
use parking_lot::RwLock;
|
||||||
use std::{
|
use std::{
|
||||||
|
@ -39,7 +42,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
||||||
/// Otherwise, only this node's kind and range are written.
|
/// Otherwise, only this node's kind and range are written.
|
||||||
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result
|
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
if recursive {
|
if recursive {
|
||||||
let mut level = 0;
|
let mut level = 0;
|
||||||
|
@ -71,7 +74,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn debug<R>(&self, resolver: &R, recursive: bool) -> String
|
pub fn debug<R>(&self, resolver: &R, recursive: bool) -> String
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
// NOTE: `fmt::Write` methods on `String` never fail
|
// NOTE: `fmt::Write` methods on `String` never fail
|
||||||
let mut res = String::new();
|
let mut res = String::new();
|
||||||
|
@ -82,7 +85,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
||||||
/// Writes this node's [`Display`](fmt::Display) representation into the given `target`.
|
/// Writes this node's [`Display`](fmt::Display) representation into the given `target`.
|
||||||
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
self.preorder_with_tokens()
|
self.preorder_with_tokens()
|
||||||
.filter_map(|event| match event {
|
.filter_map(|event| match event {
|
||||||
|
@ -98,7 +101,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn display<R>(&self, resolver: &R) -> String
|
pub fn display<R>(&self, resolver: &R) -> String
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
// NOTE: `fmt::Write` methods on `String` never fail
|
// NOTE: `fmt::Write` methods on `String` never fail
|
||||||
let mut res = String::new();
|
let mut res = String::new();
|
||||||
|
@ -107,21 +110,22 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If there is a resolver associated with this tree, returns it.
|
/// If there is a resolver associated with this tree, returns it.
|
||||||
pub fn resolver(&self) -> Option<&StdArc<dyn Resolver>> {
|
pub fn resolver(&self) -> Option<&StdArc<dyn Resolver<TokenKey>>> {
|
||||||
match &self.root().data().kind {
|
match &self.root().data().kind {
|
||||||
Kind::Root(_, resolver) => resolver.as_ref(),
|
Kind::Root(_, resolver) => resolver.as_ref(),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Turns this node into a [`ResolvedNode`], but only if there is a resolver associated with this tree.
|
/// Turns this node into a [`ResolvedNode`](crate::syntax::ResolvedNode), but only if there is a resolver associated
|
||||||
|
/// with this tree.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn try_resolved(&self) -> Option<&ResolvedNode<L, D>> {
|
pub fn try_resolved(&self) -> Option<&ResolvedNode<L, D>> {
|
||||||
// safety: we only coerce if `resolver` exists
|
// safety: we only coerce if `resolver` exists
|
||||||
self.resolver().map(|_| unsafe { ResolvedNode::coerce_ref(self) })
|
self.resolver().map(|_| unsafe { ResolvedNode::coerce_ref(self) })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Turns this node into a [`ResolvedNode`].
|
/// Turns this node into a [`ResolvedNode`](crate::syntax::ResolvedNode).
|
||||||
/// # Panics
|
/// # Panics
|
||||||
/// If there is no resolver associated with this tree.
|
/// If there is no resolver associated with this tree.
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -233,7 +237,7 @@ impl<L: Language, D> Hash for SyntaxNode<L, D> {
|
||||||
}
|
}
|
||||||
|
|
||||||
enum Kind<L: Language, D: 'static> {
|
enum Kind<L: Language, D: 'static> {
|
||||||
Root(GreenNode, Option<StdArc<dyn Resolver>>),
|
Root(GreenNode, Option<StdArc<dyn Resolver<TokenKey>>>),
|
||||||
Child {
|
Child {
|
||||||
parent: SyntaxNode<L, D>,
|
parent: SyntaxNode<L, D>,
|
||||||
index: u32,
|
index: u32,
|
||||||
|
@ -300,7 +304,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
||||||
Self { data }
|
Self { data }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn make_new_root(green: GreenNode, resolver: Option<StdArc<dyn Resolver>>) -> Self {
|
fn make_new_root(green: GreenNode, resolver: Option<StdArc<dyn Resolver<TokenKey>>>) -> Self {
|
||||||
let ref_count = Box::new(AtomicU32::new(1));
|
let ref_count = Box::new(AtomicU32::new(1));
|
||||||
let n_children = green.children().count();
|
let n_children = green.children().count();
|
||||||
let data = NodeData::new(
|
let data = NodeData::new(
|
||||||
|
@ -328,6 +332,8 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
||||||
/// # Example
|
/// # Example
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::testing::*;
|
/// # use cstree::testing::*;
|
||||||
|
/// use cstree::syntax::ResolvedNode;
|
||||||
|
///
|
||||||
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
|
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
|
||||||
/// builder.start_node(Root);
|
/// builder.start_node(Root);
|
||||||
/// builder.token(Identifier, "content");
|
/// builder.token(Identifier, "content");
|
||||||
|
@ -342,8 +348,8 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
||||||
/// assert_eq!(root.text(), "content");
|
/// assert_eq!(root.text(), "content");
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new_root_with_resolver(green: GreenNode, resolver: impl Resolver + 'static) -> ResolvedNode<L, D> {
|
pub fn new_root_with_resolver(green: GreenNode, resolver: impl Resolver<TokenKey> + 'static) -> ResolvedNode<L, D> {
|
||||||
let ptr: StdArc<dyn Resolver> = StdArc::new(resolver);
|
let ptr: StdArc<dyn Resolver<TokenKey>> = StdArc::new(resolver);
|
||||||
ResolvedNode {
|
ResolvedNode {
|
||||||
syntax: SyntaxNode::make_new_root(green, Some(ptr)),
|
syntax: SyntaxNode::make_new_root(green, Some(ptr)),
|
||||||
}
|
}
|
||||||
|
@ -517,7 +523,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
||||||
|
|
||||||
/// The internal representation of the kind of this node.
|
/// The internal representation of the kind of this node.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn syntax_kind(&self) -> SyntaxKind {
|
pub fn syntax_kind(&self) -> RawSyntaxKind {
|
||||||
self.green().kind()
|
self.green().kind()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -543,7 +549,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn resolve_text<'n, 'i, I>(&'n self, resolver: &'i I) -> SyntaxText<'n, 'i, I, L, D>
|
pub fn resolve_text<'n, 'i, I>(&'n self, resolver: &'i I) -> SyntaxText<'n, 'i, I, L, D>
|
||||||
where
|
where
|
||||||
I: Resolver + ?Sized,
|
I: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
SyntaxText::new(self, resolver)
|
SyntaxText::new(self, resolver)
|
||||||
}
|
}
|
||||||
|
@ -911,7 +917,7 @@ where
|
||||||
/// including the data and by using an external resolver.
|
/// including the data and by using an external resolver.
|
||||||
pub fn as_serialize_with_data_with_resolver<'node>(
|
pub fn as_serialize_with_data_with_resolver<'node>(
|
||||||
&'node self,
|
&'node self,
|
||||||
resolver: &'node impl Resolver,
|
resolver: &'node impl Resolver<TokenKey>,
|
||||||
) -> impl serde::Serialize + 'node
|
) -> impl serde::Serialize + 'node
|
||||||
where
|
where
|
||||||
D: serde::Serialize,
|
D: serde::Serialize,
|
||||||
|
@ -923,7 +929,7 @@ where
|
||||||
/// which uses the given resolver instead of the resolver inside the tree.
|
/// which uses the given resolver instead of the resolver inside the tree.
|
||||||
pub fn as_serialize_with_resolver<'node>(
|
pub fn as_serialize_with_resolver<'node>(
|
||||||
&'node self,
|
&'node self,
|
||||||
resolver: &'node impl Resolver,
|
resolver: &'node impl Resolver<TokenKey>,
|
||||||
) -> impl serde::Serialize + 'node {
|
) -> impl serde::Serialize + 'node {
|
||||||
SerializeWithResolver { node: self, resolver }
|
SerializeWithResolver { node: self, resolver }
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,12 +9,15 @@ use std::{
|
||||||
sync::Arc as StdArc,
|
sync::Arc as StdArc,
|
||||||
};
|
};
|
||||||
|
|
||||||
use lasso::Resolver;
|
|
||||||
use text_size::{TextRange, TextSize};
|
use text_size::{TextRange, TextSize};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
Direction, GreenNode, Language, NodeOrToken, SyntaxElementRef, SyntaxKind, SyntaxNode, SyntaxText, SyntaxToken,
|
green::GreenNode,
|
||||||
TokenAtOffset, WalkEvent,
|
interning::{Resolver, TokenKey},
|
||||||
|
syntax::*,
|
||||||
|
traversal::*,
|
||||||
|
util::*,
|
||||||
|
Language, RawSyntaxKind,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Syntax tree node that is guaranteed to belong to a tree that contains an associated
|
/// Syntax tree node that is guaranteed to belong to a tree that contains an associated
|
||||||
|
@ -109,7 +112,7 @@ impl<L: Language, D> DerefMut for ResolvedToken<L, D> {
|
||||||
/// An element of the tree that is guaranteed to belong to a tree that contains an associated
|
/// An element of the tree that is guaranteed to belong to a tree that contains an associated
|
||||||
/// [`Resolver`](lasso::Resolver), can be either a node or a token.
|
/// [`Resolver`](lasso::Resolver), can be either a node or a token.
|
||||||
/// # See also
|
/// # See also
|
||||||
/// [`SyntaxElement`](crate::SyntaxElement)
|
/// [`SyntaxElement`](crate::syntax::SyntaxElement)
|
||||||
pub type ResolvedElement<L, D = ()> = NodeOrToken<ResolvedNode<L, D>, ResolvedToken<L, D>>;
|
pub type ResolvedElement<L, D = ()> = NodeOrToken<ResolvedNode<L, D>, ResolvedToken<L, D>>;
|
||||||
|
|
||||||
impl<L: Language, D> From<ResolvedNode<L, D>> for ResolvedElement<L, D> {
|
impl<L: Language, D> From<ResolvedNode<L, D>> for ResolvedElement<L, D> {
|
||||||
|
@ -126,7 +129,7 @@ impl<L: Language, D> From<ResolvedToken<L, D>> for ResolvedElement<L, D> {
|
||||||
|
|
||||||
impl<L: Language, D> ResolvedElement<L, D> {
|
impl<L: Language, D> ResolvedElement<L, D> {
|
||||||
#[allow(missing_docs)]
|
#[allow(missing_docs)]
|
||||||
pub fn display(&self, resolver: &impl Resolver) -> String {
|
pub fn display(&self, resolver: &impl Resolver<TokenKey>) -> String {
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.display(resolver),
|
NodeOrToken::Node(it) => it.display(resolver),
|
||||||
NodeOrToken::Token(it) => it.display(resolver),
|
NodeOrToken::Token(it) => it.display(resolver),
|
||||||
|
@ -177,7 +180,7 @@ impl<L: Language, D> ResolvedNode<L, D> {
|
||||||
/// source text covered by this node, i.e. the combined text of all token leafs of the subtree
|
/// source text covered by this node, i.e. the combined text of all token leafs of the subtree
|
||||||
/// originating in this node.
|
/// originating in this node.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn text(&self) -> SyntaxText<'_, '_, dyn Resolver, L, D> {
|
pub fn text(&self) -> SyntaxText<'_, '_, dyn Resolver<TokenKey>, L, D> {
|
||||||
SyntaxText::new(self, &**self.resolver())
|
SyntaxText::new(self, &**self.resolver())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -266,13 +269,13 @@ macro_rules! forward_node {
|
||||||
|
|
||||||
impl<L: Language, D> ResolvedNode<L, D> {
|
impl<L: Language, D> ResolvedNode<L, D> {
|
||||||
/// Returns the [`Resolver`] associated with this tree.
|
/// Returns the [`Resolver`] associated with this tree.
|
||||||
pub fn resolver(&self) -> &StdArc<dyn Resolver> {
|
pub fn resolver(&self) -> &StdArc<dyn Resolver<TokenKey>> {
|
||||||
self.syntax.resolver().unwrap()
|
self.syntax.resolver().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// See [`SyntaxNode::new_root_with_resolver`].
|
/// See [`SyntaxNode::new_root_with_resolver`].
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new_root_with_resolver(green: GreenNode, resolver: impl Resolver + 'static) -> Self {
|
pub fn new_root_with_resolver(green: GreenNode, resolver: impl Resolver<TokenKey> + 'static) -> Self {
|
||||||
SyntaxNode::new_root_with_resolver(green, resolver)
|
SyntaxNode::new_root_with_resolver(green, resolver)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -498,7 +501,7 @@ impl<L: Language, D> ResolvedNode<L, D> {
|
||||||
|
|
||||||
impl<L: Language, D> ResolvedToken<L, D> {
|
impl<L: Language, D> ResolvedToken<L, D> {
|
||||||
/// Returns the [`Resolver`] associated with this tree.
|
/// Returns the [`Resolver`] associated with this tree.
|
||||||
pub fn resolver(&self) -> &StdArc<dyn Resolver> {
|
pub fn resolver(&self) -> &StdArc<dyn Resolver<TokenKey>> {
|
||||||
self.syntax.resolver().unwrap()
|
self.syntax.resolver().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -575,7 +578,7 @@ impl<L: Language, D> ResolvedElement<L, D> {
|
||||||
|
|
||||||
/// The internal representation of the kind of this element.
|
/// The internal representation of the kind of this element.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn syntax_kind(&self) -> SyntaxKind {
|
pub fn syntax_kind(&self) -> RawSyntaxKind {
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.syntax_kind(),
|
NodeOrToken::Node(it) => it.syntax_kind(),
|
||||||
NodeOrToken::Token(it) => it.syntax_kind(),
|
NodeOrToken::Token(it) => it.syntax_kind(),
|
||||||
|
@ -658,7 +661,7 @@ impl<'a, L: Language, D> ResolvedElementRef<'a, L, D> {
|
||||||
|
|
||||||
/// The internal representation of the kind of this element.
|
/// The internal representation of the kind of this element.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn syntax_kind(&self) -> SyntaxKind {
|
pub fn syntax_kind(&self) -> RawSyntaxKind {
|
||||||
match self {
|
match self {
|
||||||
NodeOrToken::Node(it) => it.syntax_kind(),
|
NodeOrToken::Node(it) => it.syntax_kind(),
|
||||||
NodeOrToken::Token(it) => it.syntax_kind(),
|
NodeOrToken::Token(it) => it.syntax_kind(),
|
||||||
|
|
|
@ -2,7 +2,12 @@
|
||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, TextSize};
|
use crate::{
|
||||||
|
interning::{Resolver, TokenKey},
|
||||||
|
syntax::{SyntaxNode, SyntaxToken},
|
||||||
|
text::{TextRange, TextSize},
|
||||||
|
Language,
|
||||||
|
};
|
||||||
|
|
||||||
/// An efficient representation of the text that is covered by a [`SyntaxNode`], i.e. the combined
|
/// An efficient representation of the text that is covered by a [`SyntaxNode`], i.e. the combined
|
||||||
/// source text of all tokens that are descendants of the node.
|
/// source text of all tokens that are descendants of the node.
|
||||||
|
@ -14,7 +19,7 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T
|
||||||
/// # Example
|
/// # Example
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::testing::*;
|
/// # use cstree::testing::*;
|
||||||
/// # use cstree::interning::IntoResolver;
|
/// # use cstree::syntax::ResolvedNode;
|
||||||
/// #
|
/// #
|
||||||
/// fn parse_float_literal(s: &str) -> ResolvedNode<MyLanguage> {
|
/// fn parse_float_literal(s: &str) -> ResolvedNode<MyLanguage> {
|
||||||
/// // parsing...
|
/// // parsing...
|
||||||
|
@ -23,7 +28,7 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T
|
||||||
/// # builder.token(Float, s);
|
/// # builder.token(Float, s);
|
||||||
/// # builder.finish_node();
|
/// # builder.finish_node();
|
||||||
/// # let (root, cache) = builder.finish();
|
/// # let (root, cache) = builder.finish();
|
||||||
/// # let resolver = cache.unwrap().into_interner().unwrap().into_resolver();
|
/// # let resolver = cache.unwrap().into_interner().unwrap();
|
||||||
/// # SyntaxNode::new_root_with_resolver(root, resolver)
|
/// # SyntaxNode::new_root_with_resolver(root, resolver)
|
||||||
/// }
|
/// }
|
||||||
/// let float_node = parse_float_literal("2.748E2");
|
/// let float_node = parse_float_literal("2.748E2");
|
||||||
|
@ -42,7 +47,7 @@ pub struct SyntaxText<'n, 'i, I: ?Sized, L: Language, D: 'static = ()> {
|
||||||
resolver: &'i I,
|
resolver: &'i I,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'n, 'i, I: Resolver + ?Sized, L: Language, D> SyntaxText<'n, 'i, I, L, D> {
|
impl<'n, 'i, I: Resolver<TokenKey> + ?Sized, L: Language, D> SyntaxText<'n, 'i, I, L, D> {
|
||||||
pub(crate) fn new(node: &'n SyntaxNode<L, D>, resolver: &'i I) -> Self {
|
pub(crate) fn new(node: &'n SyntaxNode<L, D>, resolver: &'i I) -> Self {
|
||||||
let range = node.text_range();
|
let range = node.text_range();
|
||||||
SyntaxText { node, range, resolver }
|
SyntaxText { node, range, resolver }
|
||||||
|
@ -203,25 +208,25 @@ fn found<T>(res: Result<(), T>) -> Option<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Resolver + ?Sized, L: Language, D> fmt::Debug for SyntaxText<'_, '_, I, L, D> {
|
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> fmt::Debug for SyntaxText<'_, '_, I, L, D> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
fmt::Debug::fmt(&self.to_string(), f)
|
fmt::Debug::fmt(&self.to_string(), f)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Resolver + ?Sized, L: Language, D> fmt::Display for SyntaxText<'_, '_, I, L, D> {
|
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> fmt::Display for SyntaxText<'_, '_, I, L, D> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
self.try_for_each_chunk(|chunk| fmt::Display::fmt(chunk, f))
|
self.try_for_each_chunk(|chunk| fmt::Display::fmt(chunk, f))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Resolver + ?Sized, L: Language, D> From<SyntaxText<'_, '_, I, L, D>> for String {
|
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> From<SyntaxText<'_, '_, I, L, D>> for String {
|
||||||
fn from(text: SyntaxText<'_, '_, I, L, D>) -> String {
|
fn from(text: SyntaxText<'_, '_, I, L, D>) -> String {
|
||||||
text.to_string()
|
text.to_string()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<str> for SyntaxText<'_, '_, I, L, D> {
|
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> PartialEq<str> for SyntaxText<'_, '_, I, L, D> {
|
||||||
fn eq(&self, mut rhs: &str) -> bool {
|
fn eq(&self, mut rhs: &str) -> bool {
|
||||||
self.try_for_each_chunk(|chunk| {
|
self.try_for_each_chunk(|chunk| {
|
||||||
if !rhs.starts_with(chunk) {
|
if !rhs.starts_with(chunk) {
|
||||||
|
@ -235,19 +240,19 @@ impl<I: Resolver + ?Sized, L: Language, D> PartialEq<str> for SyntaxText<'_, '_,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for str {
|
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for str {
|
||||||
fn eq(&self, rhs: &SyntaxText<'_, '_, I, L, D>) -> bool {
|
fn eq(&self, rhs: &SyntaxText<'_, '_, I, L, D>) -> bool {
|
||||||
rhs == self
|
rhs == self
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<&'_ str> for SyntaxText<'_, '_, I, L, D> {
|
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> PartialEq<&'_ str> for SyntaxText<'_, '_, I, L, D> {
|
||||||
fn eq(&self, rhs: &&str) -> bool {
|
fn eq(&self, rhs: &&str) -> bool {
|
||||||
self == *rhs
|
self == *rhs
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for &'_ str {
|
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for &'_ str {
|
||||||
fn eq(&self, rhs: &SyntaxText<'_, '_, I, L, D>) -> bool {
|
fn eq(&self, rhs: &SyntaxText<'_, '_, I, L, D>) -> bool {
|
||||||
rhs == self
|
rhs == self
|
||||||
}
|
}
|
||||||
|
@ -258,8 +263,8 @@ impl<'n1, 'i1, 'n2, 'i2, I1, I2, L1, L2, D1, D2> PartialEq<SyntaxText<'n2, 'i2,
|
||||||
where
|
where
|
||||||
L1: Language,
|
L1: Language,
|
||||||
L2: Language,
|
L2: Language,
|
||||||
I1: Resolver + ?Sized,
|
I1: Resolver<TokenKey> + ?Sized,
|
||||||
I2: Resolver + ?Sized,
|
I2: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
fn eq(&self, other: &SyntaxText<'_, '_, I2, L2, D2>) -> bool {
|
fn eq(&self, other: &SyntaxText<'_, '_, I2, L2, D2>) -> bool {
|
||||||
if self.range.len() != other.range.len() {
|
if self.range.len() != other.range.len() {
|
||||||
|
@ -282,8 +287,8 @@ fn zip_texts<'it1, 'it2, It1, It2, I1, I2, L1, L2, D1, D2>(
|
||||||
where
|
where
|
||||||
It1: Iterator<Item = (&'it1 SyntaxToken<L1, D1>, TextRange)>,
|
It1: Iterator<Item = (&'it1 SyntaxToken<L1, D1>, TextRange)>,
|
||||||
It2: Iterator<Item = (&'it2 SyntaxToken<L2, D2>, TextRange)>,
|
It2: Iterator<Item = (&'it2 SyntaxToken<L2, D2>, TextRange)>,
|
||||||
I1: Resolver + ?Sized,
|
I1: Resolver<TokenKey> + ?Sized,
|
||||||
I2: Resolver + ?Sized,
|
I2: Resolver<TokenKey> + ?Sized,
|
||||||
D1: 'static,
|
D1: 'static,
|
||||||
D2: 'static,
|
D2: 'static,
|
||||||
L1: Language + 'it1,
|
L1: Language + 'it1,
|
||||||
|
@ -309,12 +314,12 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Resolver + ?Sized, L: Language, D> Eq for SyntaxText<'_, '_, I, L, D> {}
|
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> Eq for SyntaxText<'_, '_, I, L, D> {}
|
||||||
|
|
||||||
mod private {
|
mod private {
|
||||||
use std::ops;
|
use std::ops;
|
||||||
|
|
||||||
use crate::{TextRange, TextSize};
|
use crate::text::{TextRange, TextSize};
|
||||||
|
|
||||||
pub trait SyntaxTextRange {
|
pub trait SyntaxTextRange {
|
||||||
fn start(&self) -> Option<TextSize>;
|
fn start(&self) -> Option<TextSize>;
|
||||||
|
@ -374,27 +379,27 @@ mod private {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::{green::SyntaxKind, GreenNodeBuilder};
|
use crate::{build::GreenNodeBuilder, RawSyntaxKind};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||||
pub enum TestLang {}
|
pub enum TestLang {}
|
||||||
impl Language for TestLang {
|
impl Language for TestLang {
|
||||||
type Kind = SyntaxKind;
|
type Kind = RawSyntaxKind;
|
||||||
|
|
||||||
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
|
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||||
raw
|
raw
|
||||||
}
|
}
|
||||||
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||||
kind
|
kind
|
||||||
}
|
}
|
||||||
|
|
||||||
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||||
if kind == SyntaxKind(1) {
|
if kind == RawSyntaxKind(1) {
|
||||||
Some("{")
|
Some("{")
|
||||||
} else if kind == SyntaxKind(2) {
|
} else if kind == RawSyntaxKind(2) {
|
||||||
Some("}")
|
Some("}")
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
@ -402,16 +407,16 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_tree(chunks: &[&str]) -> (SyntaxNode<TestLang, ()>, impl Resolver) {
|
fn build_tree(chunks: &[&str]) -> (SyntaxNode<TestLang, ()>, impl Resolver<TokenKey>) {
|
||||||
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
|
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
|
||||||
builder.start_node(SyntaxKind(62));
|
builder.start_node(RawSyntaxKind(62));
|
||||||
for &chunk in chunks.iter() {
|
for &chunk in chunks.iter() {
|
||||||
let kind = match chunk {
|
let kind = match chunk {
|
||||||
"{" => 1,
|
"{" => 1,
|
||||||
"}" => 2,
|
"}" => 2,
|
||||||
_ => 3,
|
_ => 3,
|
||||||
};
|
};
|
||||||
builder.token(SyntaxKind(kind), chunk);
|
builder.token(RawSyntaxKind(kind), chunk);
|
||||||
}
|
}
|
||||||
builder.finish_node();
|
builder.finish_node();
|
||||||
let (node, cache) = builder.finish();
|
let (node, cache) = builder.finish();
|
||||||
|
|
|
@ -5,11 +5,15 @@ use std::{
|
||||||
sync::Arc as StdArc,
|
sync::Arc as StdArc,
|
||||||
};
|
};
|
||||||
|
|
||||||
use lasso::Resolver;
|
|
||||||
use text_size::{TextRange, TextSize};
|
use text_size::{TextRange, TextSize};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::{interning::Key, Direction, GreenNode, GreenToken, Language, SyntaxKind};
|
use crate::{
|
||||||
|
green::{GreenNode, GreenToken},
|
||||||
|
interning::{Resolver, TokenKey},
|
||||||
|
traversal::Direction,
|
||||||
|
Language, RawSyntaxKind,
|
||||||
|
};
|
||||||
|
|
||||||
/// Syntax tree token.
|
/// Syntax tree token.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -49,7 +53,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
||||||
/// Writes this token's [`Debug`](fmt::Debug) representation into the given `target`.
|
/// Writes this token's [`Debug`](fmt::Debug) representation into the given `target`.
|
||||||
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
write!(target, "{:?}@{:?}", self.kind(), self.text_range())?;
|
write!(target, "{:?}@{:?}", self.kind(), self.text_range())?;
|
||||||
let text = self.resolve_text(resolver);
|
let text = self.resolve_text(resolver);
|
||||||
|
@ -72,7 +76,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn debug<R>(&self, resolver: &R) -> String
|
pub fn debug<R>(&self, resolver: &R) -> String
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
// NOTE: `fmt::Write` methods on `String` never fail
|
// NOTE: `fmt::Write` methods on `String` never fail
|
||||||
let mut res = String::new();
|
let mut res = String::new();
|
||||||
|
@ -84,7 +88,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
write!(target, "{}", self.resolve_text(resolver))
|
write!(target, "{}", self.resolve_text(resolver))
|
||||||
}
|
}
|
||||||
|
@ -95,25 +99,26 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn display<R>(&self, resolver: &R) -> String
|
pub fn display<R>(&self, resolver: &R) -> String
|
||||||
where
|
where
|
||||||
R: Resolver + ?Sized,
|
R: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
self.resolve_text(resolver).to_string()
|
self.resolve_text(resolver).to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If there is a resolver associated with this tree, returns it.
|
/// If there is a resolver associated with this tree, returns it.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn resolver(&self) -> Option<&StdArc<dyn Resolver>> {
|
pub fn resolver(&self) -> Option<&StdArc<dyn Resolver<TokenKey>>> {
|
||||||
self.parent.resolver()
|
self.parent.resolver()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Turns this token into a [`ResolvedToken`], but only if there is a resolver associated with this tree.
|
/// Turns this token into a [`ResolvedToken`](crate::syntax::ResolvedToken), but only if there is a resolver
|
||||||
|
/// associated with this tree.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn try_resolved(&self) -> Option<&ResolvedToken<L, D>> {
|
pub fn try_resolved(&self) -> Option<&ResolvedToken<L, D>> {
|
||||||
// safety: we only coerce if `resolver` exists
|
// safety: we only coerce if `resolver` exists
|
||||||
self.resolver().map(|_| unsafe { ResolvedToken::coerce_ref(self) })
|
self.resolver().map(|_| unsafe { ResolvedToken::coerce_ref(self) })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Turns this token into a [`ResolvedToken`].
|
/// Turns this token into a [`ResolvedToken`](crate::syntax::ResolvedToken).
|
||||||
/// # Panics
|
/// # Panics
|
||||||
/// If there is no resolver associated with this tree.
|
/// If there is no resolver associated with this tree.
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -153,7 +158,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
||||||
|
|
||||||
/// The internal representation of the kind of this token.
|
/// The internal representation of the kind of this token.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn syntax_kind(&self) -> SyntaxKind {
|
pub fn syntax_kind(&self) -> RawSyntaxKind {
|
||||||
self.green().kind()
|
self.green().kind()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -176,7 +181,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn resolve_text<'i, I>(&self, resolver: &'i I) -> &'i str
|
pub fn resolve_text<'i, I>(&self, resolver: &'i I) -> &'i str
|
||||||
where
|
where
|
||||||
I: Resolver + ?Sized,
|
I: Resolver<TokenKey> + ?Sized,
|
||||||
{
|
{
|
||||||
// one of the two must be present upon construction
|
// one of the two must be present upon construction
|
||||||
self.static_text().or_else(|| self.green().text(resolver)).unwrap()
|
self.static_text().or_else(|| self.green().text(resolver)).unwrap()
|
||||||
|
@ -191,6 +196,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::testing::*;
|
/// # use cstree::testing::*;
|
||||||
|
/// # use cstree::build::*;
|
||||||
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
|
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
|
||||||
/// # builder.start_node(Root);
|
/// # builder.start_node(Root);
|
||||||
/// # builder.token(Identifier, "x");
|
/// # builder.token(Identifier, "x");
|
||||||
|
@ -278,18 +284,18 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
||||||
/// implementation by re-using the interner in both.
|
/// implementation by re-using the interner in both.
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::testing::*;
|
/// # use cstree::testing::*;
|
||||||
/// use cstree::interning::{new_interner, Hasher, Key, Rodeo};
|
/// use cstree::interning::{new_interner, TokenInterner, TokenKey};
|
||||||
/// struct TypeTable {
|
/// struct TypeTable {
|
||||||
/// // ...
|
/// // ...
|
||||||
/// }
|
/// }
|
||||||
/// impl TypeTable {
|
/// impl TypeTable {
|
||||||
/// fn type_of(&self, ident: Key) -> &str {
|
/// fn type_of(&self, ident: TokenKey) -> &str {
|
||||||
/// // ...
|
/// // ...
|
||||||
/// # ""
|
/// # ""
|
||||||
/// }
|
/// }
|
||||||
/// }
|
/// }
|
||||||
/// # struct State {
|
/// # struct State {
|
||||||
/// # interner: Rodeo,
|
/// # interner: TokenInterner,
|
||||||
/// # type_table: TypeTable,
|
/// # type_table: TypeTable,
|
||||||
/// # }
|
/// # }
|
||||||
/// let interner = new_interner();
|
/// let interner = new_interner();
|
||||||
|
@ -297,7 +303,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
||||||
/// interner,
|
/// interner,
|
||||||
/// type_table: TypeTable{ /* stuff */},
|
/// type_table: TypeTable{ /* stuff */},
|
||||||
/// };
|
/// };
|
||||||
/// let mut builder: GreenNodeBuilder<MyLanguage, Rodeo> =
|
/// let mut builder: GreenNodeBuilder<MyLanguage, TokenInterner> =
|
||||||
/// GreenNodeBuilder::with_interner(&mut state.interner);
|
/// GreenNodeBuilder::with_interner(&mut state.interner);
|
||||||
/// # let input = "";
|
/// # let input = "";
|
||||||
/// # builder.start_node(Root);
|
/// # builder.start_node(Root);
|
||||||
|
@ -315,7 +321,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
||||||
/// let typ = type_table.type_of(ident.text_key().unwrap());
|
/// let typ = type_table.type_of(ident.text_key().unwrap());
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn text_key(&self) -> Option<Key> {
|
pub fn text_key(&self) -> Option<TokenKey> {
|
||||||
self.green().text_key()
|
self.green().text_key()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -109,7 +109,7 @@ impl<T> std::ops::Deref for MaybeOwned<'_, T> {
|
||||||
fn deref(&self) -> &T {
|
fn deref(&self) -> &T {
|
||||||
match self {
|
match self {
|
||||||
MaybeOwned::Owned(it) => it,
|
MaybeOwned::Owned(it) => it,
|
||||||
MaybeOwned::Borrowed(it) => *it,
|
MaybeOwned::Borrowed(it) => it,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -118,7 +118,7 @@ impl<T> std::ops::DerefMut for MaybeOwned<'_, T> {
|
||||||
fn deref_mut(&mut self) -> &mut T {
|
fn deref_mut(&mut self) -> &mut T {
|
||||||
match self {
|
match self {
|
||||||
MaybeOwned::Owned(it) => it,
|
MaybeOwned::Owned(it) => it,
|
||||||
MaybeOwned::Borrowed(it) => *it,
|
MaybeOwned::Borrowed(it) => it,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
use super::*;
|
use super::*;
|
||||||
use cstree::{GreenNodeBuilder, NodeCache, SyntaxKind, TextRange};
|
use cstree::{
|
||||||
use lasso::{Resolver, Rodeo};
|
build::{GreenNodeBuilder, NodeCache},
|
||||||
|
interning::{new_interner, Resolver},
|
||||||
|
text::TextRange,
|
||||||
|
RawSyntaxKind,
|
||||||
|
};
|
||||||
|
|
||||||
fn build_tree<D>(root: &Element<'_>) -> (SyntaxNode<D>, impl Resolver) {
|
fn build_tree<D>(root: &Element<'_>) -> (SyntaxNode<D>, impl Resolver) {
|
||||||
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
|
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
|
||||||
|
@ -31,20 +35,20 @@ fn tree_with_eq_tokens() -> Element<'static> {
|
||||||
fn create() {
|
fn create() {
|
||||||
let tree = two_level_tree();
|
let tree = two_level_tree();
|
||||||
let (tree, resolver) = build_tree::<()>(&tree);
|
let (tree, resolver) = build_tree::<()>(&tree);
|
||||||
assert_eq!(tree.syntax_kind(), SyntaxKind(0));
|
assert_eq!(tree.syntax_kind(), RawSyntaxKind(0));
|
||||||
assert_eq!(tree.kind(), SyntaxKind(0));
|
assert_eq!(tree.kind(), RawSyntaxKind(0));
|
||||||
{
|
{
|
||||||
let leaf1_0 = tree.children().nth(1).unwrap().children_with_tokens().next().unwrap();
|
let leaf1_0 = tree.children().nth(1).unwrap().children_with_tokens().next().unwrap();
|
||||||
let leaf1_0 = leaf1_0.into_token().unwrap();
|
let leaf1_0 = leaf1_0.into_token().unwrap();
|
||||||
assert_eq!(leaf1_0.syntax_kind(), SyntaxKind(5));
|
assert_eq!(leaf1_0.syntax_kind(), RawSyntaxKind(5));
|
||||||
assert_eq!(leaf1_0.kind(), SyntaxKind(5));
|
assert_eq!(leaf1_0.kind(), RawSyntaxKind(5));
|
||||||
assert_eq!(leaf1_0.resolve_text(&resolver), "1.0");
|
assert_eq!(leaf1_0.resolve_text(&resolver), "1.0");
|
||||||
assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into()));
|
assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into()));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
let node2 = tree.children().nth(2).unwrap();
|
let node2 = tree.children().nth(2).unwrap();
|
||||||
assert_eq!(node2.syntax_kind(), SyntaxKind(6));
|
assert_eq!(node2.syntax_kind(), RawSyntaxKind(6));
|
||||||
assert_eq!(node2.kind(), SyntaxKind(6));
|
assert_eq!(node2.kind(), RawSyntaxKind(6));
|
||||||
assert_eq!(node2.children_with_tokens().count(), 3);
|
assert_eq!(node2.children_with_tokens().count(), 3);
|
||||||
assert_eq!(node2.resolve_text(&resolver), "2.02.12.2");
|
assert_eq!(node2.resolve_text(&resolver), "2.02.12.2");
|
||||||
}
|
}
|
||||||
|
@ -54,7 +58,7 @@ fn create() {
|
||||||
fn token_text_eq() {
|
fn token_text_eq() {
|
||||||
let tree = tree_with_eq_tokens();
|
let tree = tree_with_eq_tokens();
|
||||||
let (tree, _) = build_tree::<()>(&tree);
|
let (tree, _) = build_tree::<()>(&tree);
|
||||||
assert_eq!(tree.kind(), SyntaxKind(0));
|
assert_eq!(tree.kind(), RawSyntaxKind(0));
|
||||||
|
|
||||||
let leaf0_0 = tree.children().next().unwrap().children_with_tokens().next().unwrap();
|
let leaf0_0 = tree.children().next().unwrap().children_with_tokens().next().unwrap();
|
||||||
let leaf0_0 = leaf0_0.into_token().unwrap();
|
let leaf0_0 = leaf0_0.into_token().unwrap();
|
||||||
|
@ -115,7 +119,7 @@ fn data() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn with_interner() {
|
fn with_interner() {
|
||||||
let mut interner = Rodeo::new();
|
let mut interner = new_interner();
|
||||||
let mut cache = NodeCache::with_interner(&mut interner);
|
let mut cache = NodeCache::with_interner(&mut interner);
|
||||||
let tree = two_level_tree();
|
let tree = two_level_tree();
|
||||||
let tree = build_tree_with_cache(&tree, &mut cache);
|
let tree = build_tree_with_cache(&tree, &mut cache);
|
||||||
|
@ -135,7 +139,7 @@ fn with_interner() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn inline_resolver() {
|
fn inline_resolver() {
|
||||||
let mut interner = Rodeo::new();
|
let mut interner = new_interner();
|
||||||
let mut cache = NodeCache::with_interner(&mut interner);
|
let mut cache = NodeCache::with_interner(&mut interner);
|
||||||
let tree = two_level_tree();
|
let tree = two_level_tree();
|
||||||
let tree = build_tree_with_cache(&tree, &mut cache);
|
let tree = build_tree_with_cache(&tree, &mut cache);
|
||||||
|
@ -146,7 +150,7 @@ fn inline_resolver() {
|
||||||
assert_eq!(leaf1_0.text(), "1.0");
|
assert_eq!(leaf1_0.text(), "1.0");
|
||||||
assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into()));
|
assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into()));
|
||||||
assert_eq!(format!("{}", leaf1_0), leaf1_0.text());
|
assert_eq!(format!("{}", leaf1_0), leaf1_0.text());
|
||||||
assert_eq!(format!("{:?}", leaf1_0), "SyntaxKind(5)@6..9 \"1.0\"");
|
assert_eq!(format!("{:?}", leaf1_0), "RawSyntaxKind(5)@6..9 \"1.0\"");
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
let node2 = tree.children().nth(2).unwrap();
|
let node2 = tree.children().nth(2).unwrap();
|
||||||
|
@ -154,13 +158,13 @@ fn inline_resolver() {
|
||||||
let resolver = node2.resolver();
|
let resolver = node2.resolver();
|
||||||
assert_eq!(node2.resolve_text(resolver.as_ref()), node2.text());
|
assert_eq!(node2.resolve_text(resolver.as_ref()), node2.text());
|
||||||
assert_eq!(format!("{}", node2).as_str(), node2.text());
|
assert_eq!(format!("{}", node2).as_str(), node2.text());
|
||||||
assert_eq!(format!("{:?}", node2), "SyntaxKind(6)@9..18");
|
assert_eq!(format!("{:?}", node2), "RawSyntaxKind(6)@9..18");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
format!("{:#?}", node2),
|
format!("{:#?}", node2),
|
||||||
r#"SyntaxKind(6)@9..18
|
r#"RawSyntaxKind(6)@9..18
|
||||||
SyntaxKind(7)@9..12 "2.0"
|
RawSyntaxKind(7)@9..12 "2.0"
|
||||||
SyntaxKind(8)@12..15 "2.1"
|
RawSyntaxKind(8)@12..15 "2.1"
|
||||||
SyntaxKind(9)@15..18 "2.2"
|
RawSyntaxKind(9)@15..18 "2.2"
|
||||||
"#
|
"#
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -175,7 +179,7 @@ fn assert_debug_display() {
|
||||||
f::<ResolvedToken>();
|
f::<ResolvedToken>();
|
||||||
f::<ResolvedElement>();
|
f::<ResolvedElement>();
|
||||||
f::<ResolvedElementRef<'static>>();
|
f::<ResolvedElementRef<'static>>();
|
||||||
f::<cstree::NodeOrToken<String, u128>>();
|
f::<cstree::util::NodeOrToken<String, u128>>();
|
||||||
|
|
||||||
fn dbg<T: fmt::Debug>() {}
|
fn dbg<T: fmt::Debug>() {}
|
||||||
dbg::<GreenNodeBuilder<'static, 'static, TestLang>>();
|
dbg::<GreenNodeBuilder<'static, 'static, TestLang>>();
|
||||||
|
|
|
@ -4,18 +4,22 @@ mod sendsync;
|
||||||
#[cfg(feature = "serialize")]
|
#[cfg(feature = "serialize")]
|
||||||
mod serde;
|
mod serde;
|
||||||
|
|
||||||
use cstree::{GreenNode, GreenNodeBuilder, Language, NodeCache, SyntaxKind};
|
use cstree::{
|
||||||
use lasso::Interner;
|
build::{GreenNodeBuilder, NodeCache},
|
||||||
|
green::GreenNode,
|
||||||
|
interning::Interner,
|
||||||
|
Language, RawSyntaxKind,
|
||||||
|
};
|
||||||
|
|
||||||
pub type SyntaxNode<D = ()> = cstree::SyntaxNode<TestLang, D>;
|
pub type SyntaxNode<D = ()> = cstree::syntax::SyntaxNode<TestLang, D>;
|
||||||
pub type SyntaxToken<D = ()> = cstree::SyntaxToken<TestLang, D>;
|
pub type SyntaxToken<D = ()> = cstree::syntax::SyntaxToken<TestLang, D>;
|
||||||
pub type SyntaxElement<D = ()> = cstree::SyntaxElement<TestLang, D>;
|
pub type SyntaxElement<D = ()> = cstree::syntax::SyntaxElement<TestLang, D>;
|
||||||
pub type SyntaxElementRef<'a, D = ()> = cstree::SyntaxElementRef<'a, TestLang, D>;
|
pub type SyntaxElementRef<'a, D = ()> = cstree::syntax::SyntaxElementRef<'a, TestLang, D>;
|
||||||
|
|
||||||
pub type ResolvedNode<D = ()> = cstree::ResolvedNode<TestLang, D>;
|
pub type ResolvedNode<D = ()> = cstree::syntax::ResolvedNode<TestLang, D>;
|
||||||
pub type ResolvedToken<D = ()> = cstree::ResolvedToken<TestLang, D>;
|
pub type ResolvedToken<D = ()> = cstree::syntax::ResolvedToken<TestLang, D>;
|
||||||
pub type ResolvedElement<D = ()> = cstree::ResolvedElement<TestLang, D>;
|
pub type ResolvedElement<D = ()> = cstree::syntax::ResolvedElement<TestLang, D>;
|
||||||
pub type ResolvedElementRef<'a, D = ()> = cstree::ResolvedElementRef<'a, TestLang, D>;
|
pub type ResolvedElementRef<'a, D = ()> = cstree::syntax::ResolvedElementRef<'a, TestLang, D>;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Element<'s> {
|
pub enum Element<'s> {
|
||||||
|
@ -26,13 +30,13 @@ pub enum Element<'s> {
|
||||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||||
pub enum TestLang {}
|
pub enum TestLang {}
|
||||||
impl Language for TestLang {
|
impl Language for TestLang {
|
||||||
type Kind = SyntaxKind;
|
type Kind = RawSyntaxKind;
|
||||||
|
|
||||||
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
|
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||||
raw
|
raw
|
||||||
}
|
}
|
||||||
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||||
kind
|
kind
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,7 +45,7 @@ impl Language for TestLang {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_tree_with_cache<'c, 'i, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode
|
pub fn build_tree_with_cache<I>(root: &Element<'_>, cache: &mut NodeCache<'_, I>) -> GreenNode
|
||||||
where
|
where
|
||||||
I: Interner,
|
I: Interner,
|
||||||
{
|
{
|
||||||
|
@ -52,25 +56,21 @@ where
|
||||||
node
|
node
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_recursive<'c, 'i, L, I>(
|
pub fn build_recursive<L, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u16) -> u16
|
||||||
root: &Element<'_>,
|
|
||||||
builder: &mut GreenNodeBuilder<'c, 'i, L, I>,
|
|
||||||
mut from: u16,
|
|
||||||
) -> u16
|
|
||||||
where
|
where
|
||||||
L: Language<Kind = SyntaxKind>,
|
L: Language<Kind = RawSyntaxKind>,
|
||||||
I: Interner,
|
I: Interner,
|
||||||
{
|
{
|
||||||
match root {
|
match root {
|
||||||
Element::Node(children) => {
|
Element::Node(children) => {
|
||||||
builder.start_node(SyntaxKind(from));
|
builder.start_node(RawSyntaxKind(from));
|
||||||
for child in children {
|
for child in children {
|
||||||
from = build_recursive(child, builder, from + 1);
|
from = build_recursive(child, builder, from + 1);
|
||||||
}
|
}
|
||||||
builder.finish_node();
|
builder.finish_node();
|
||||||
}
|
}
|
||||||
Element::Token(text) => {
|
Element::Token(text) => {
|
||||||
builder.token(SyntaxKind(from), *text);
|
builder.token(RawSyntaxKind(from), text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
from
|
from
|
||||||
|
|
|
@ -3,7 +3,7 @@ fn empty_tree_arc() {
|
||||||
// this test is not here for the test itself, but to run it through MIRI, who complained about out-of-bound
|
// this test is not here for the test itself, but to run it through MIRI, who complained about out-of-bound
|
||||||
// `ThinArc` pointers for a root `GreenNode` with no children
|
// `ThinArc` pointers for a root `GreenNode` with no children
|
||||||
|
|
||||||
use cstree::*;
|
use cstree::{build::GreenNodeBuilder, syntax::SyntaxNode};
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
#[repr(u16)]
|
#[repr(u16)]
|
||||||
|
@ -16,13 +16,13 @@ fn empty_tree_arc() {
|
||||||
// ...
|
// ...
|
||||||
type Kind = SyntaxKind;
|
type Kind = SyntaxKind;
|
||||||
|
|
||||||
fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
|
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||||
assert!(raw.0 <= SyntaxKind::Root as u16);
|
assert!(raw.0 <= SyntaxKind::Root as u16);
|
||||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||||
cstree::SyntaxKind(kind as u16)
|
cstree::RawSyntaxKind(kind as u16)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn static_text(_kind: Self::Kind) -> Option<&'static str> {
|
fn static_text(_kind: Self::Kind) -> Option<&'static str> {
|
||||||
|
|
|
@ -4,13 +4,29 @@ use crossbeam_utils::thread::scope;
|
||||||
use std::{thread, time::Duration};
|
use std::{thread, time::Duration};
|
||||||
|
|
||||||
use super::{build_recursive, Element, ResolvedNode, SyntaxNode, TestLang};
|
use super::{build_recursive, Element, ResolvedNode, SyntaxNode, TestLang};
|
||||||
use cstree::{interning::IntoResolver, GreenNodeBuilder};
|
use cstree::build::GreenNodeBuilder;
|
||||||
|
|
||||||
|
// Excercise the multi-threaded interner when the corresponding feature is enabled.
|
||||||
|
|
||||||
|
#[cfg(feature = "multi_threaded_interning")]
|
||||||
|
use cstree::interning::{new_threaded_interner, MultiThreadedTokenInterner};
|
||||||
|
|
||||||
|
#[cfg(not(feature = "multi_threaded_interning"))]
|
||||||
|
fn get_builder() -> GreenNodeBuilder<'static, 'static, TestLang> {
|
||||||
|
GreenNodeBuilder::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "multi_threaded_interning")]
|
||||||
|
fn get_builder() -> GreenNodeBuilder<'static, 'static, TestLang, MultiThreadedTokenInterner> {
|
||||||
|
let interner = new_threaded_interner();
|
||||||
|
GreenNodeBuilder::from_interner(interner)
|
||||||
|
}
|
||||||
|
|
||||||
fn build_tree<D>(root: &Element<'_>) -> ResolvedNode<D> {
|
fn build_tree<D>(root: &Element<'_>) -> ResolvedNode<D> {
|
||||||
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
|
let mut builder = get_builder();
|
||||||
build_recursive(root, &mut builder, 0);
|
build_recursive(root, &mut builder, 0);
|
||||||
let (node, cache) = builder.finish();
|
let (node, cache) = builder.finish();
|
||||||
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap().into_resolver())
|
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn two_level_tree() -> Element<'static> {
|
fn two_level_tree() -> Element<'static> {
|
||||||
|
|
|
@ -2,8 +2,9 @@ use crate::{build_recursive, build_tree_with_cache, ResolvedNode};
|
||||||
|
|
||||||
use super::{Element, SyntaxNode, TestLang};
|
use super::{Element, SyntaxNode, TestLang};
|
||||||
use cstree::{
|
use cstree::{
|
||||||
interning::{new_interner, IntoResolver},
|
build::{GreenNodeBuilder, NodeCache},
|
||||||
GreenNodeBuilder, NodeCache, NodeOrToken,
|
interning::new_interner,
|
||||||
|
util::NodeOrToken,
|
||||||
};
|
};
|
||||||
use serde_test::Token;
|
use serde_test::Token;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
@ -227,7 +228,7 @@ fn build_tree(root: Element<'_>) -> ResolvedNode<String> {
|
||||||
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
|
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
|
||||||
build_recursive(&root, &mut builder, 0);
|
build_recursive(&root, &mut builder, 0);
|
||||||
let (node, cache) = builder.finish();
|
let (node, cache) = builder.finish();
|
||||||
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap().into_resolver())
|
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn attach_data(node: &SyntaxNode<String>) {
|
fn attach_data(node: &SyntaxNode<String>) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue