mirror of
https://github.com/RGBCube/cstree
synced 2025-07-27 09:07:44 +00:00
Set up a module structure (#44)
This commit is contained in:
parent
baa0a9f2f0
commit
16f7a3bd80
38 changed files with 2291 additions and 454 deletions
20
.github/workflows/main.yml
vendored
20
.github/workflows/main.yml
vendored
|
@ -31,8 +31,19 @@ jobs:
|
|||
- uses: hecrj/setup-rust-action@v1
|
||||
with:
|
||||
rust-version: ${{ matrix.rust }}
|
||||
- run: cargo test --verbose --all-features
|
||||
- run: cargo test --release --verbose --all-features
|
||||
|
||||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --all-targets --verbose
|
||||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --all-targets --verbose --all-features
|
||||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --all-targets --verbose --all-features --release
|
||||
|
||||
check:
|
||||
name: Check
|
||||
|
@ -48,6 +59,7 @@ jobs:
|
|||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: check
|
||||
args: --all-targets --all-features
|
||||
|
||||
clippy:
|
||||
name: Clippy
|
||||
|
@ -79,11 +91,13 @@ jobs:
|
|||
name: Check doc links
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
RUSTDOCFLAGS: -Dwarnings
|
||||
RUSTDOCFLAGS: -Dwarnings --cfg doc_cfg
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: hecrj/setup-rust-action@v1
|
||||
with:
|
||||
rust-version: nightly
|
||||
- run: cargo doc --all-features --document-private-items --no-deps
|
||||
|
||||
miri-test:
|
||||
|
|
45
CHANGELOG.md
45
CHANGELOG.md
|
@ -2,7 +2,52 @@
|
|||
|
||||
## `v0.12.0`
|
||||
|
||||
* Documentation has been improved in most areas, together with a switch to a more principled module structure that allows explicitly documenting submodules.
|
||||
* The `interning` module has been rewritten. It now provides fuctions for obtaining a default interner (`new_interner` and `new_threaded_interner`) and provides a small, dependency-free interner implementation.
|
||||
* Compatibility with other interners can be enable via feature flags.
|
||||
* **Note** that compatibilty with `lasso` is not enabled by default. Use the `lasso_compat` feature to match the previous default.
|
||||
* Introduced `Language::static_text` to optimize tokens that always appear with the same text (estimated 10-15% faster tree building when used, depending on the ratio of static to dynamic tokens).
|
||||
* Since `cstree`s are lossless, `GreenNodeBuilder::token` must still be passed the source text even for static tokens.
|
||||
* Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements.
|
||||
* Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option<SyntaxNode>` is now the same size as `SyntaxNode` itself: the size of a pointer).
|
||||
* `SyntaxKind` has been renamed to `RawSyntaxKind` to no longer conflict with user-defined `SyntaxKind` enumerations.
|
||||
* The crate's export module structure has been reorganized to give different groups of definitions their own submodules. A `cstree::prelude` module is available, containing the most commonly needed types that were previously accessible via `use cstree::*`. Otherwise, the module structure is now as follows:
|
||||
* `cstree`
|
||||
* `Language`
|
||||
* `RawSyntaxKind`
|
||||
* `build`
|
||||
* `GreenNodeBuilder`
|
||||
* `NodeCache`
|
||||
* `Checkpoint`
|
||||
* `green`
|
||||
* `GreenNode`
|
||||
* `GreenToken`
|
||||
* `GreenNodeChildren`
|
||||
* `syntax`
|
||||
* `{Syntax,Resolved}Node`
|
||||
* `{Syntax,Resolved}Token`
|
||||
* `{Syntax,Resolved}Element`
|
||||
* `{Syntax,Resolved}ElementRef`
|
||||
* `SyntaxNodeChildren`
|
||||
* `SyntaxElementChildren`
|
||||
* `SyntaxText`
|
||||
* `interning`
|
||||
* `TokenKey` and the `InternKey` trait
|
||||
* `Interner` and `Resolver` traits
|
||||
* `new_interner` and `TokenInterner`
|
||||
* `new_threaded_interner` and `MultiThreadedTokenInterner` (with the `multi_threaded_interning` feature enabled)
|
||||
* compatibility implementations for interning crates depending on selected feature flags
|
||||
* `text`
|
||||
* `TextSize`
|
||||
* `TextRange`
|
||||
* `SyntaxText` (re-export)
|
||||
* `traversal`
|
||||
* `Direction`
|
||||
* `WalkEvent`
|
||||
* `util`
|
||||
* `NodeOrToken`
|
||||
* `TokenAtOffset`
|
||||
* `sync`
|
||||
* `Arc`
|
||||
* `prelude`
|
||||
* re-exports of the most-used items
|
38
Cargo.toml
38
Cargo.toml
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
edition = "2021"
|
||||
name = "cstree"
|
||||
version = "0.11.1"
|
||||
version = "0.12.0-rc.0" # when updating, also update `#![doc(html_root_url)]`
|
||||
authors = [
|
||||
"Domenic Quirl <DomenicQuirl@pm.me>",
|
||||
"Aleksey Kladov <aleksey.kladov@gmail.com>",
|
||||
|
@ -15,7 +15,6 @@ readme = "README.md"
|
|||
debug = true
|
||||
|
||||
[dependencies]
|
||||
lasso = { version = "0.6", features = ["inline-more", "multi-threaded"] }
|
||||
text-size = "1.1.0"
|
||||
fxhash = "0.2.1"
|
||||
parking_lot = "0.11.2"
|
||||
|
@ -24,6 +23,20 @@ parking_lot = "0.11.2"
|
|||
triomphe = "0.1.7"
|
||||
sptr = "0.3.2"
|
||||
|
||||
# Default Interner
|
||||
indexmap = "1.9"
|
||||
|
||||
[dependencies.lasso]
|
||||
version = "0.6"
|
||||
features = ["inline-more"]
|
||||
optional = true
|
||||
|
||||
[dependencies.salsa]
|
||||
git = "https://github.com/salsa-rs/salsa/"
|
||||
version = "0.1"
|
||||
optional = true
|
||||
package = "salsa-2022"
|
||||
|
||||
[dependencies.serde]
|
||||
version = "1.0"
|
||||
optional = true
|
||||
|
@ -43,7 +56,24 @@ harness = false
|
|||
|
||||
[features]
|
||||
default = []
|
||||
serialize = ["serde", "lasso/serialize"]
|
||||
# Implementations of `serde::{De,}Serialize` for CSTrees.
|
||||
serialize = ["serde", "lasso?/serialize"]
|
||||
# Interoperability with the `lasso` interning crate.
|
||||
# When enabled, `cstree`'s default interners will use `lasso` internally, too.
|
||||
lasso_compat = ["lasso"]
|
||||
# Additionally provide threadsafe interner types.
|
||||
# Where applicable (and if the corresponding features are selected), provide compatibility
|
||||
# implementations for multi-thread interners from other crates.
|
||||
multi_threaded_interning = ["lasso_compat", "lasso/multi-threaded"]
|
||||
# Interoperability with the `salsa` framework for incremental computation.
|
||||
# Use this feature for "Salsa 2022".
|
||||
# WARNING: This feature is considered unstable!
|
||||
salsa_2022_compat = ["salsa"]
|
||||
|
||||
[[example]]
|
||||
name = "salsa"
|
||||
required-features = ["salsa_2022_compat"]
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
features = ["serialize"]
|
||||
all-features = true
|
||||
rustdoc-args = ["--cfg", "doc_cfg"]
|
||||
|
|
287
README.md
287
README.md
|
@ -32,8 +32,291 @@ Notable differences of `cstree` compared to `rowan`:
|
|||
- Performance optimizations for tree traversal: persisting red nodes allows tree traversal methods to return references. You can still `clone` to obtain an owned node, but you only pay that cost when you need to.
|
||||
|
||||
## Getting Started
|
||||
The main entry points for constructing syntax trees are `GreenNodeBuilder` and `SyntaxNode::new_root` for green and red trees respectively.
|
||||
See `examples/s_expressions` for a guided tutorial to `cstree`.
|
||||
|
||||
If you're looking at `cstree`, you're probably looking at or already writing a parser and are considering using
|
||||
concrete syntax trees as its output. We'll talk more about parsing below -- first, let's have a look at what needs
|
||||
to happen to go from input text to a `cstree` syntax tree:
|
||||
|
||||
1. Define an enumeration of the types of tokens (like keywords) and nodes (like "an expression")
|
||||
that you want to have in your syntax and implement `Language`
|
||||
|
||||
2. Create a `GreenNodeBuilder` and call `start_node`, `token` and `finish_node` from your parser
|
||||
|
||||
3. Call `SyntaxNode::new_root` or `SyntaxNode::new_root_with_resolver` with the resulting
|
||||
`GreenNode` to obtain a syntax tree that you can traverse
|
||||
|
||||
Let's walk through the motions of parsing a (very) simple language into `cstree` syntax trees.
|
||||
We'll just support addition and subtraction on integers, from which the user is allowed to construct a single,
|
||||
compound expression. They will, however, be allowed to write nested expressions in parentheses, like `1 - (2 + 5)`.
|
||||
|
||||
### Defining the language
|
||||
First, we need to list the different part of our language's grammar.
|
||||
We can do that using an `enum` with a unit variant for any terminal and non-terminal.
|
||||
The `enum` needs to be convertible to a `u16`, so we use the `repr` attribute to ensure it uses the correct
|
||||
representation.
|
||||
|
||||
```rust
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[repr(u16)]
|
||||
enum SyntaxKind {
|
||||
/* Tokens */
|
||||
Int, // 42
|
||||
Plus, // +
|
||||
Minus, // -
|
||||
LParen, // (
|
||||
RParen, // )
|
||||
/* Nodes */
|
||||
Expr,
|
||||
Root,
|
||||
}
|
||||
```
|
||||
|
||||
Most of these are tokens to lex the input string into, like numbers (`Int`) and operators (`Plus`, `Minus`).
|
||||
We only really need one type of node; expressions.
|
||||
Our syntax tree's root node will have the special kind `Root`, all other nodes will be
|
||||
expressions containing a sequence of arithmetic operations potentially involving further, nested
|
||||
expression nodes.
|
||||
|
||||
To use our `SyntaxKind`s with `cstree`, we need to tell it how to convert it back to just a number (the
|
||||
`#[repr(u16)]` that we added) by implementing the `Language` trait. We can also tell `cstree` about tokens that
|
||||
always have the same text through the `static_text` method on the trait. This is useful for the operators and
|
||||
parentheses, but not possible for numbers, since an integer token may be produced from the input `3`, but also from
|
||||
other numbers like `7` or `12`. We implement `Language` on an empty type, just so we can give it a name.
|
||||
|
||||
```rust
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Calculator;
|
||||
|
||||
impl Language for Calculator {
|
||||
// The tokens and nodes we just defined
|
||||
type Kind = SyntaxKind;
|
||||
|
||||
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||
// This just needs to be the inverse of `kind_to_raw`, but could also
|
||||
// be an `impl TryFrom<u16> for SyntaxKind` or any other conversion.
|
||||
match raw.0 {
|
||||
0 => SyntaxKind::Int,
|
||||
1 => SyntaxKind::Plus,
|
||||
2 => SyntaxKind::Minus,
|
||||
3 => SyntaxKind::LParen,
|
||||
4 => SyntaxKind::RParen,
|
||||
5 => SyntaxKind::Expr,
|
||||
6 => SyntaxKind::Root,
|
||||
n => panic!("Unknown raw syntax kind: {n}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||
RawSyntaxKind(kind as u16)
|
||||
}
|
||||
|
||||
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||
match kind {
|
||||
SyntaxKind::Plus => Some("+"),
|
||||
SyntaxKind::Minus => Some("-"),
|
||||
SyntaxKind::LParen => Some("("),
|
||||
SyntaxKind::RParen => Some(")"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Parsing into a green tree
|
||||
With that out of the way, we can start writing the parser for our expressions.
|
||||
For the purposes of this introduction to `cstree`, I'll assume that there is a lexer that yields the following
|
||||
tokens:
|
||||
|
||||
```rust
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum Token<'input> {
|
||||
// Note that number strings are not yet parsed into actual numbers,
|
||||
// we just remember the slice of the input that contains their digits
|
||||
Int(&'input str),
|
||||
Plus,
|
||||
Minus,
|
||||
LParen,
|
||||
RParen,
|
||||
// A special token that indicates that we have reached the end of the file
|
||||
EoF,
|
||||
}
|
||||
```
|
||||
|
||||
A simple lexer that yields such tokens is part of the full `readme` example, but we'll be busy enough with the
|
||||
combination of `cstree` and the actual parser, which we define like this:
|
||||
|
||||
```rust
|
||||
pub struct Parser<'input> {
|
||||
// `Peekable` is a standard library iterator adapter that allows
|
||||
// looking ahead at the next item without removing it from the iterator yet
|
||||
lexer: Peekable<Lexer<'input>>,
|
||||
builder: GreenNodeBuilder<'static, 'static, Calculator>,
|
||||
}
|
||||
|
||||
impl<'input> Parser<'input> {
|
||||
pub fn new(input: &'input str) -> Self {
|
||||
Self {
|
||||
// we get `peekable` from implementing `Iterator` on `Lexer`
|
||||
lexer: Lexer::new(input).peekable(),
|
||||
builder: GreenNodeBuilder::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn bump(&mut self) -> Option<Token<'input>> {
|
||||
self.lexer.next()
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
In contrast to parsers that return abstract syntax trees, with `cstree` the syntax tree nodes for
|
||||
all element in the language grammar will have the same type: `GreenNode` for the inner ("green")
|
||||
tree and `SyntaxNode` for the outer ("red") tree. Different kinds of nodes (and tokens) are
|
||||
differentiated by their `SyntaxKind` tag, which we defined above.
|
||||
|
||||
You can implement many types of parsers with `cstree`. To get a feel for how it works, consider
|
||||
a typical recursive descent parser. With a more traditional AST, one would define different AST
|
||||
structs for struct or function definitions, statements, expressions and so on. Inside the
|
||||
parser, the components of any element, such as all fields of a struct or all statements inside a
|
||||
function, are parsed first and then the parser wraps them in the matching AST type, which is
|
||||
returned from the corresponding parser function.
|
||||
|
||||
Because `cstree`'s syntax trees are untyped, there is no explicit AST representation that the parser
|
||||
would build. Instead, parsing into a CST using the `GreenNodeBuilder` follows the source code more
|
||||
closely in that you tell `cstree` about each new element you enter and all tokens that the parser
|
||||
consumes. So, for example, to parse a struct definition the parser first "enters" the struct
|
||||
definition node, then parses the `struct` keyword and type name, then parses each field, and finally
|
||||
"finishes" parsing the struct node.
|
||||
|
||||
The most trivial example is the root node for our parser, which just creates a root node
|
||||
containing the whole expression (we could do without a specific root node if any expression was
|
||||
a node, in particular if we wrapped integer literal tokens inside `Expr` nodes).
|
||||
|
||||
```rust
|
||||
pub fn parse(&mut self) -> Result<(), String> {
|
||||
self.builder.start_node(SyntaxKind::Root);
|
||||
self.parse_expr()?;
|
||||
self.builder.finish_node();
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
As there isn't a static AST type to return, the parser is very flexible as to what is part of a
|
||||
node. In the previous example, if the user is adding a new field to the struct and has not yet
|
||||
typed the field's type, the CST node for the struct doesn't care if there is no child node for
|
||||
it. Similarly, if the user is deleting fields and the source code currently contains a leftover
|
||||
field name, this additional identifier can be a part of the struct node without any
|
||||
modifications to the syntax tree definition. This property is the key to why CSTs are such a
|
||||
good fit as a lossless input representation, which necessitates the syntax tree to mirror the
|
||||
user-specific layout of whitespace and comments around the AST items.
|
||||
|
||||
In the parser for our simple expression language, we'll also have to deal with the fact that,
|
||||
when we see a number the parser doesn't yet know whether there will be additional operations
|
||||
following that number. That is, in the expression `1 + 2`, it can only know that it is parsing
|
||||
a binary operation once it sees the `+`. The event-like model of building trees in `cstree`,
|
||||
however, implies that when reaching the `+`, the parser would have to have already entered an
|
||||
expression node in order for the whole input to be part of the expression.
|
||||
|
||||
To get around this, `GreenNodeBuilder` provides the `checkpoint` method, which we can call to
|
||||
"remember" the current position in the input. For example, we can create a checkpoint before the
|
||||
parser parses the first `1`. Later, when it sees the following `+`, it can create an `Expr` node
|
||||
for the whole expression using `start_node_at`:
|
||||
|
||||
```rust
|
||||
fn parse_lhs(&mut self) -> Result<(), String> {
|
||||
// An expression may start either with a number, or with an opening parenthesis that is
|
||||
// the start of a parenthesized expression
|
||||
let next_token = *self.lexer.peek().unwrap();
|
||||
match next_token {
|
||||
Token::Int(n) => {
|
||||
self.bump();
|
||||
self.builder.token(SyntaxKind::Int, n);
|
||||
}
|
||||
Token::LParen => {
|
||||
// Wrap the grouped expression inside a node containing it and its parentheses
|
||||
self.builder.start_node(SyntaxKind::Expr);
|
||||
self.bump();
|
||||
self.builder.static_token(SyntaxKind::LParen);
|
||||
self.parse_expr()?; // Inner expression
|
||||
if self.bump() != Some(Token::RParen) {
|
||||
return Err("Missing ')'".to_string());
|
||||
}
|
||||
self.builder.static_token(SyntaxKind::RParen);
|
||||
self.builder.finish_node();
|
||||
}
|
||||
Token::EoF => return Err("Unexpected end of file: expected expression".to_string()),
|
||||
t => return Err(format!("Unexpected start of expression: '{t:?}'")),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_expr(&mut self) -> Result<(), String> {
|
||||
// Remember our current position
|
||||
let before_expr = self.builder.checkpoint();
|
||||
|
||||
// Parse the start of the expression
|
||||
self.parse_lhs()?;
|
||||
|
||||
// Check if the expression continues with `+ <more>` or `- <more>`
|
||||
let Some(next_token) = self.lexer.peek() else {
|
||||
return Ok(());
|
||||
};
|
||||
let op = match *next_token {
|
||||
Token::Plus => SyntaxKind::Plus,
|
||||
Token::Minus => SyntaxKind::Minus,
|
||||
Token::RParen | Token::EoF => return Ok(()),
|
||||
t => return Err(format!("Expected operator, found '{t:?}'")),
|
||||
};
|
||||
|
||||
// If so, retroactively wrap the (already parsed) LHS and the following RHS
|
||||
// inside an `Expr` node
|
||||
self.builder.start_node_at(before_expr, SyntaxKind::Expr);
|
||||
self.bump();
|
||||
self.builder.static_token(op);
|
||||
self.parse_expr()?; // RHS
|
||||
self.builder.finish_node();
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
### Obtaining the parser result
|
||||
|
||||
Our parser is now capable of parsing our little arithmetic language, but it's methods don't return
|
||||
anything. So how do we get our syntax tree out? The answer lies in `GreenNodeBuilder::finish`, which
|
||||
finally returns the tree that we have painstakingly constructed.
|
||||
|
||||
```rust
|
||||
impl Parser<'_> {
|
||||
pub fn finish(mut self) -> (GreenNode, impl Interner) {
|
||||
assert!(self.lexer.next().map(|t| t == Token::EoF).unwrap_or(true));
|
||||
let (tree, cache) = self.builder.finish();
|
||||
(tree, cache.unwrap().into_interner().unwrap())
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`finish` also returns the cache it used to deduplicate tree nodes and tokens, so you can re-use it
|
||||
for parsing related inputs (e.g., different source files from the same crate may share a lot of
|
||||
common function and type names that can be deduplicated). See `GreenNodeBuilder`'s documentation for
|
||||
more information on this, in particular the `with_cache` and `from_cache` methods. Most importantly
|
||||
for us, we can extract the `Interner` that contains the source text of the tree's tokens from the
|
||||
cache, which we need if we want to look up things like variable names or the value of numbers for
|
||||
our calculator.
|
||||
|
||||
To work with the syntax tree, you'll want to upgrade it to a `SyntaxNode` using
|
||||
`SyntaxNode::new_root`. You can also use `SyntaxNode::new_root_with_resolver` to combine tree and
|
||||
interner, which lets you directly retrieve source text and makes the nodes implement `Display` and
|
||||
`Debug`. The same output can be produced from `SyntaxNode`s by calling the `debug` or `display`
|
||||
method with a `Resolver`. To visualize the whole syntax tree, pass `true` for the `recursive`
|
||||
parameter on `debug`, or simply debug-print a `ResolvedNode`:
|
||||
|
||||
```rust
|
||||
let input = "11 + 2-(5 + 4)";
|
||||
let mut parser = Parser::new(input);
|
||||
parser.parse().unwrap();
|
||||
let (tree, interner) = parser.finish();
|
||||
let root = SyntaxNode::<Calculator>::new_root_with_resolver(tree, interner);
|
||||
dbg!(root);
|
||||
```
|
||||
|
||||
## AST Layer
|
||||
While `cstree` is built for concrete syntax trees, applications are quite easily able to work with either a CST or an AST representation, or freely switch between them.
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
|
||||
use cstree::*;
|
||||
use lasso::{Interner, Rodeo};
|
||||
use cstree::{
|
||||
build::*,
|
||||
green::GreenNode,
|
||||
interning::{new_interner, Interner},
|
||||
Language, RawSyntaxKind,
|
||||
};
|
||||
use std::{fmt, hash::Hash};
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -40,7 +44,7 @@ impl Bool for UseStaticText {
|
|||
impl<T: Bool> Language for TestLang<T> {
|
||||
type Kind = TestKind;
|
||||
|
||||
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
|
||||
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||
if raw.0 == u16::MAX - 1 {
|
||||
TestKind::Plus
|
||||
} else {
|
||||
|
@ -48,10 +52,10 @@ impl<T: Bool> Language for TestLang<T> {
|
|||
}
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
|
||||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||
match kind {
|
||||
TestKind::Element { n } => SyntaxKind(n),
|
||||
TestKind::Plus => SyntaxKind(u16::MAX - 1),
|
||||
TestKind::Element { n } => RawSyntaxKind(n),
|
||||
TestKind::Plus => RawSyntaxKind(u16::MAX - 1),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -67,7 +71,7 @@ impl<T: Bool> Language for TestLang<T> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn build_tree_with_cache<'c, 'i, T: Bool, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode
|
||||
pub fn build_tree_with_cache<T: Bool, I>(root: &Element<'_>, cache: &mut NodeCache<'_, I>) -> GreenNode
|
||||
where
|
||||
I: Interner,
|
||||
{
|
||||
|
@ -78,9 +82,9 @@ where
|
|||
node
|
||||
}
|
||||
|
||||
pub fn build_recursive<'c, 'i, T: Bool, I>(
|
||||
pub fn build_recursive<T: Bool, I>(
|
||||
root: &Element<'_>,
|
||||
builder: &mut GreenNodeBuilder<'c, 'i, TestLang<T>, I>,
|
||||
builder: &mut GreenNodeBuilder<'_, '_, TestLang<T>, I>,
|
||||
mut from: u16,
|
||||
) -> u16
|
||||
where
|
||||
|
@ -95,7 +99,7 @@ where
|
|||
builder.finish_node();
|
||||
}
|
||||
Element::Token(text) => {
|
||||
builder.token(TestKind::Element { n: from }, *text);
|
||||
builder.token(TestKind::Element { n: from }, text);
|
||||
}
|
||||
Element::Plus => {
|
||||
builder.token(TestKind::Plus, "+");
|
||||
|
@ -114,10 +118,15 @@ fn two_level_tree() -> Element<'static> {
|
|||
}
|
||||
|
||||
pub fn create(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("two-level tree");
|
||||
#[cfg(not(feature = "lasso_compat"))]
|
||||
const GROUP_NAME: &str = "two-level tree (default interner)";
|
||||
#[cfg(feature = "lasso_compat")]
|
||||
const GROUP_NAME: &str = "two-level tree (lasso)";
|
||||
|
||||
let mut group = c.benchmark_group(GROUP_NAME);
|
||||
group.throughput(Throughput::Elements(1));
|
||||
|
||||
let mut interner = Rodeo::new();
|
||||
let mut interner = new_interner();
|
||||
let mut cache = NodeCache::with_interner(&mut interner);
|
||||
let tree = two_level_tree();
|
||||
|
||||
|
|
|
@ -13,10 +13,7 @@
|
|||
//! - "+" Token(Add)
|
||||
//! - "4" Token(Number)
|
||||
|
||||
use cstree::{
|
||||
interning::{IntoResolver, Resolver},
|
||||
GreenNodeBuilder, NodeOrToken,
|
||||
};
|
||||
use cstree::{build::GreenNodeBuilder, interning::Resolver, util::NodeOrToken};
|
||||
use std::iter::Peekable;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
|
@ -36,7 +33,7 @@ enum SyntaxKind {
|
|||
}
|
||||
use SyntaxKind::*;
|
||||
|
||||
impl From<SyntaxKind> for cstree::SyntaxKind {
|
||||
impl From<SyntaxKind> for cstree::RawSyntaxKind {
|
||||
fn from(kind: SyntaxKind) -> Self {
|
||||
Self(kind as u16)
|
||||
}
|
||||
|
@ -47,12 +44,12 @@ enum Lang {}
|
|||
impl cstree::Language for Lang {
|
||||
type Kind = SyntaxKind;
|
||||
|
||||
fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
|
||||
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||
assert!(raw.0 <= Root as u16);
|
||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
|
||||
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||
kind.into()
|
||||
}
|
||||
|
||||
|
@ -67,12 +64,12 @@ impl cstree::Language for Lang {
|
|||
}
|
||||
}
|
||||
|
||||
type SyntaxNode = cstree::SyntaxNode<Lang>;
|
||||
type SyntaxNode = cstree::syntax::SyntaxNode<Lang>;
|
||||
#[allow(unused)]
|
||||
type SyntaxToken = cstree::SyntaxToken<Lang>;
|
||||
type SyntaxToken = cstree::syntax::SyntaxToken<Lang>;
|
||||
#[allow(unused)]
|
||||
type SyntaxElement = cstree::NodeOrToken<SyntaxNode, SyntaxToken>;
|
||||
type SyntaxElementRef<'a> = cstree::NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>;
|
||||
type SyntaxElement = cstree::util::NodeOrToken<SyntaxNode, SyntaxToken>;
|
||||
type SyntaxElementRef<'a> = cstree::util::NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>;
|
||||
|
||||
struct Parser<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> {
|
||||
builder: GreenNodeBuilder<'static, 'static, Lang>,
|
||||
|
@ -128,10 +125,7 @@ impl<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> Parser<'input, I> {
|
|||
self.builder.finish_node();
|
||||
|
||||
let (tree, cache) = self.builder.finish();
|
||||
(
|
||||
SyntaxNode::new_root(tree),
|
||||
cache.unwrap().into_interner().unwrap().into_resolver(),
|
||||
)
|
||||
(SyntaxNode::new_root(tree), cache.unwrap().into_interner().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
334
examples/readme.rs
Normal file
334
examples/readme.rs
Normal file
|
@ -0,0 +1,334 @@
|
|||
use std::{io::Write, iter::Peekable};
|
||||
|
||||
use cstree::{
|
||||
interning::Interner,
|
||||
prelude::*,
|
||||
syntax::{ResolvedElementRef, ResolvedNode},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[repr(u16)]
|
||||
pub enum SyntaxKind {
|
||||
/* Tokens */
|
||||
Int, // 42
|
||||
Plus, // +
|
||||
Minus, // -
|
||||
LParen, // (
|
||||
RParen, // )
|
||||
/* Nodes */
|
||||
Expr,
|
||||
Root,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Calculator;
|
||||
impl Language for Calculator {
|
||||
// The tokens and nodes we just defined
|
||||
type Kind = SyntaxKind;
|
||||
|
||||
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||
// This just needs to be the inverse of `kind_to_raw`, but could also
|
||||
// be an `impl TryFrom<u16> for SyntaxKind` or any other conversion.
|
||||
match raw.0 {
|
||||
0 => SyntaxKind::Int,
|
||||
1 => SyntaxKind::Plus,
|
||||
2 => SyntaxKind::Minus,
|
||||
3 => SyntaxKind::LParen,
|
||||
4 => SyntaxKind::RParen,
|
||||
5 => SyntaxKind::Expr,
|
||||
6 => SyntaxKind::Root,
|
||||
n => panic!("Unknown raw syntax kind: {n}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||
RawSyntaxKind(kind as u16)
|
||||
}
|
||||
|
||||
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||
match kind {
|
||||
SyntaxKind::Plus => Some("+"),
|
||||
SyntaxKind::Minus => Some("-"),
|
||||
SyntaxKind::LParen => Some("("),
|
||||
SyntaxKind::RParen => Some(")"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum Token<'input> {
|
||||
Int(&'input str),
|
||||
Plus,
|
||||
Minus,
|
||||
LParen,
|
||||
RParen,
|
||||
EoF,
|
||||
}
|
||||
|
||||
pub struct Lexer<'input> {
|
||||
input: &'input str,
|
||||
at_eof: bool,
|
||||
}
|
||||
|
||||
impl<'input> Lexer<'input> {
|
||||
pub fn new(input: &'input str) -> Self {
|
||||
Self { input, at_eof: false }
|
||||
}
|
||||
|
||||
fn next_token(&mut self) -> Result<Token<'input>, String> {
|
||||
loop {
|
||||
let Some(next_char) = self.input.chars().next() else {
|
||||
self.at_eof = true;
|
||||
return Ok(Token::EoF);
|
||||
};
|
||||
|
||||
let token = match next_char {
|
||||
'+' => Token::Plus,
|
||||
'-' => Token::Minus,
|
||||
'(' => Token::LParen,
|
||||
')' => Token::RParen,
|
||||
c if c.is_ascii_digit() => {
|
||||
let (last_digit_idx, _char) = self
|
||||
.input
|
||||
.char_indices()
|
||||
.take_while(|(_idx, c)| c.is_ascii_digit())
|
||||
.last()
|
||||
.expect("matched at least one");
|
||||
// Advance lexer
|
||||
let number = Token::Int(&self.input[..=last_digit_idx]);
|
||||
self.input = &self.input[(last_digit_idx + 1)..];
|
||||
return Ok(number);
|
||||
}
|
||||
c if c.is_whitespace() => {
|
||||
// Skip whitespace
|
||||
let (last_ws_idx, _char) = self
|
||||
.input
|
||||
.char_indices()
|
||||
.take_while(|(_idx, c)| c.is_whitespace())
|
||||
.last()
|
||||
.expect("matched at least one");
|
||||
// Advance lexer
|
||||
self.input = &self.input[(last_ws_idx + 1)..];
|
||||
continue;
|
||||
}
|
||||
c => return Err(format!("Unknown start of token: '{c}'")),
|
||||
};
|
||||
|
||||
// Advance lexer
|
||||
self.input = &self.input[1..];
|
||||
return Ok(token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'input> Iterator for Lexer<'input> {
|
||||
type Item = Token<'input>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.at_eof {
|
||||
None
|
||||
} else {
|
||||
Some(self.next_token().expect("Failed to lex input"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Parser<'input> {
|
||||
lexer: Peekable<Lexer<'input>>,
|
||||
builder: GreenNodeBuilder<'static, 'static, Calculator>,
|
||||
}
|
||||
|
||||
impl<'input> Parser<'input> {
|
||||
pub fn new(input: &'input str) -> Self {
|
||||
Self {
|
||||
lexer: Lexer::new(input).peekable(),
|
||||
builder: GreenNodeBuilder::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn bump(&mut self) -> Option<Token<'input>> {
|
||||
self.lexer.next()
|
||||
}
|
||||
|
||||
pub fn parse(&mut self) -> Result<(), String> {
|
||||
self.builder.start_node(SyntaxKind::Root);
|
||||
self.parse_expr()?;
|
||||
self.builder.finish_node();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_lhs(&mut self) -> Result<(), String> {
|
||||
// An expression may start either with a number, or with an opening parenthesis that is the start of a
|
||||
// parenthesized expression
|
||||
let next_token = *self.lexer.peek().unwrap();
|
||||
match next_token {
|
||||
Token::Int(n) => {
|
||||
self.bump();
|
||||
self.builder.token(SyntaxKind::Int, n);
|
||||
}
|
||||
Token::LParen => {
|
||||
// Wrap the grouped expression inside a node containing it and its parentheses
|
||||
self.builder.start_node(SyntaxKind::Expr);
|
||||
self.bump();
|
||||
self.builder.static_token(SyntaxKind::LParen);
|
||||
self.parse_expr()?; // Inner expression
|
||||
if self.bump() != Some(Token::RParen) {
|
||||
return Err("Missing ')'".to_string());
|
||||
}
|
||||
self.builder.static_token(SyntaxKind::RParen);
|
||||
self.builder.finish_node();
|
||||
}
|
||||
Token::EoF => return Err("Unexpected end of file: expected expression".to_string()),
|
||||
t => return Err(format!("Unexpected start of expression: '{t:?}'")),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_expr(&mut self) -> Result<(), String> {
|
||||
// Remember our current position
|
||||
let before_expr = self.builder.checkpoint();
|
||||
|
||||
// Parse the start of the expression
|
||||
self.parse_lhs()?;
|
||||
|
||||
// Check if the expression continues with `+ <more>` or `- <more>`
|
||||
let Some(next_token) = self.lexer.peek() else {
|
||||
return Ok(());
|
||||
};
|
||||
let op = match *next_token {
|
||||
Token::Plus => SyntaxKind::Plus,
|
||||
Token::Minus => SyntaxKind::Minus,
|
||||
Token::RParen | Token::EoF => return Ok(()),
|
||||
t => return Err(format!("Expected operator, found '{t:?}'")),
|
||||
};
|
||||
|
||||
// If so, retroactively wrap the (already parsed) LHS and the following RHS inside an `Expr` node
|
||||
self.builder.start_node_at(before_expr, SyntaxKind::Expr);
|
||||
self.bump();
|
||||
self.builder.static_token(op);
|
||||
self.parse_expr()?; // RHS
|
||||
self.builder.finish_node();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn finish(mut self) -> (GreenNode, impl Interner) {
|
||||
assert!(self.lexer.next().map(|t| t == Token::EoF).unwrap_or(true));
|
||||
let (tree, cache) = self.builder.finish();
|
||||
(tree, cache.unwrap().into_interner().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
use std::io;
|
||||
|
||||
let mut buf = String::new();
|
||||
loop {
|
||||
print!("Enter expression: ");
|
||||
io::stdout().flush().unwrap();
|
||||
buf.clear();
|
||||
if let Err(e) = io::stdin().read_line(&mut buf) {
|
||||
eprintln!("Error reading input: {e}");
|
||||
continue;
|
||||
}
|
||||
let mut parser = Parser::new(&buf);
|
||||
if let Err(e) = parser.parse() {
|
||||
eprintln!("Parse error: {e}");
|
||||
continue;
|
||||
}
|
||||
|
||||
let (tree, interner) = parser.finish();
|
||||
let root = SyntaxNode::<Calculator>::new_root_with_resolver(tree, interner);
|
||||
|
||||
if let Some(expr) = root.first_child_or_token() {
|
||||
let result = eval_elem(expr, &mut root.children_with_tokens());
|
||||
println!("Result: {result}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn eval(expr: &ResolvedNode<Calculator>) -> i64 {
|
||||
let mut children = expr.children_with_tokens();
|
||||
let lhs = eval_elem(children.next().expect("empty expr"), &mut children);
|
||||
let Some(op) = children.next().map(|elem| elem.kind()) else {
|
||||
// Literal expression
|
||||
return lhs;
|
||||
};
|
||||
let rhs = eval_elem(children.next().expect("missing RHS"), &mut children);
|
||||
|
||||
match op {
|
||||
SyntaxKind::Plus => lhs + rhs,
|
||||
SyntaxKind::Minus => lhs - rhs,
|
||||
_ => unreachable!("invalid op"),
|
||||
}
|
||||
}
|
||||
|
||||
fn eval_elem<'e>(
|
||||
expr: ResolvedElementRef<'_, Calculator>,
|
||||
children: &mut impl Iterator<Item = ResolvedElementRef<'e, Calculator>>,
|
||||
) -> i64 {
|
||||
use cstree::util::NodeOrToken;
|
||||
|
||||
match expr {
|
||||
NodeOrToken::Node(n) => {
|
||||
assert_eq!(n.kind(), SyntaxKind::Expr);
|
||||
eval(n)
|
||||
}
|
||||
NodeOrToken::Token(t) => match t.kind() {
|
||||
SyntaxKind::Int => {
|
||||
let number_str = t.text();
|
||||
number_str.parse().expect("parsed int could not be evaluated")
|
||||
}
|
||||
SyntaxKind::LParen => {
|
||||
let inner = children.next().expect("missing content inside parens");
|
||||
// It's important that we consume the `)` here, as otherwise `eval` might mistake it for an operator
|
||||
assert_eq!(
|
||||
children
|
||||
.next()
|
||||
.and_then(|elem| elem.into_token())
|
||||
.map(|token| token.kind()),
|
||||
Some(SyntaxKind::RParen)
|
||||
);
|
||||
eval_elem(inner, children)
|
||||
}
|
||||
_ => unreachable!("invalid start of expression"),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn lex() {
|
||||
let input = "11 + 2-(5 + 4)";
|
||||
let lexer = Lexer::new(input);
|
||||
let tokens: Vec<_> = lexer.into_iter().collect();
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Token::Int("11"),
|
||||
Token::Plus,
|
||||
Token::Int("2"),
|
||||
Token::Minus,
|
||||
Token::LParen,
|
||||
Token::Int("5"),
|
||||
Token::Plus,
|
||||
Token::Int("4"),
|
||||
Token::RParen,
|
||||
Token::EoF
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
let input = "11 + 2-(5 + 4)";
|
||||
let mut parser = Parser::new(input);
|
||||
parser.parse().unwrap();
|
||||
let (tree, interner) = parser.finish();
|
||||
let root = SyntaxNode::<Calculator>::new_root_with_resolver(tree, interner);
|
||||
dbg!(root);
|
||||
}
|
||||
}
|
|
@ -30,7 +30,7 @@ use SyntaxKind::*;
|
|||
/// in order to not need the user's `enum SyntaxKind` as a type parameter.
|
||||
///
|
||||
/// First, to easily pass the enum variants into cstree via `.into()`:
|
||||
impl From<SyntaxKind> for cstree::SyntaxKind {
|
||||
impl From<SyntaxKind> for cstree::RawSyntaxKind {
|
||||
fn from(kind: SyntaxKind) -> Self {
|
||||
Self(kind as u16)
|
||||
}
|
||||
|
@ -44,12 +44,12 @@ pub enum Lang {}
|
|||
impl cstree::Language for Lang {
|
||||
type Kind = SyntaxKind;
|
||||
|
||||
fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
|
||||
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||
assert!(raw.0 <= Root as u16);
|
||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
|
||||
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||
kind.into()
|
||||
}
|
||||
|
||||
|
@ -66,14 +66,11 @@ impl cstree::Language for Lang {
|
|||
/// offsets and parent pointers.
|
||||
/// cstree also deduplicates the actual source string in addition to the tree nodes, so we will need
|
||||
/// the Resolver to get the real text back from the interned representation.
|
||||
use cstree::{
|
||||
interning::{IntoResolver, Resolver},
|
||||
GreenNode, Language,
|
||||
};
|
||||
use cstree::{green::GreenNode, interning::Resolver, Language};
|
||||
|
||||
/// You can construct GreenNodes by hand, but a builder is helpful for top-down parsers: it maintains
|
||||
/// a stack of currently in-progress nodes.
|
||||
use cstree::GreenNodeBuilder;
|
||||
use cstree::build::GreenNodeBuilder;
|
||||
|
||||
/// The parse results are stored as a "green tree".
|
||||
/// We'll discuss how to work with the results later.
|
||||
|
@ -135,7 +132,7 @@ fn parse(text: &str) -> Parse<impl Resolver> {
|
|||
let (tree, cache) = self.builder.finish();
|
||||
Parse {
|
||||
green_node: tree,
|
||||
resolver: cache.unwrap().into_interner().unwrap().into_resolver(),
|
||||
resolver: cache.unwrap().into_interner().unwrap(),
|
||||
errors: self.errors,
|
||||
}
|
||||
}
|
||||
|
@ -213,11 +210,11 @@ fn parse(text: &str) -> Parse<impl Resolver> {
|
|||
/// To work with the parse results we need a view into the green tree - the syntax tree.
|
||||
/// It is also immutable, like a GreenNode, but it contains parent pointers, offsets, and has
|
||||
/// identity semantics.
|
||||
type SyntaxNode = cstree::SyntaxNode<Lang>;
|
||||
type SyntaxNode = cstree::syntax::SyntaxNode<Lang>;
|
||||
#[allow(unused)]
|
||||
type SyntaxToken = cstree::SyntaxToken<Lang>;
|
||||
type SyntaxToken = cstree::syntax::SyntaxToken<Lang>;
|
||||
#[allow(unused)]
|
||||
type SyntaxElement = cstree::SyntaxElement<Lang>;
|
||||
type SyntaxElement = cstree::syntax::SyntaxElement<Lang>;
|
||||
|
||||
impl<I> Parse<I> {
|
||||
fn syntax(&self) -> SyntaxNode {
|
||||
|
@ -355,8 +352,10 @@ impl ast::Atom {
|
|||
}
|
||||
|
||||
fn text<'r>(&self, resolver: &'r impl Resolver) -> &'r str {
|
||||
match &self.0.green().children().next() {
|
||||
Some(cstree::NodeOrToken::Token(token)) => Lang::static_text(Lang::kind_from_raw(token.kind()))
|
||||
use cstree::util::NodeOrToken;
|
||||
|
||||
match self.0.green().children().next() {
|
||||
Some(NodeOrToken::Token(token)) => Lang::static_text(Lang::kind_from_raw(token.kind()))
|
||||
.or_else(|| token.text(resolver))
|
||||
.unwrap(),
|
||||
_ => unreachable!(),
|
||||
|
@ -422,7 +421,7 @@ nan
|
|||
/// Split the input string into a flat list of tokens (such as L_PAREN, WORD, and WHITESPACE)
|
||||
fn lex(text: &str) -> VecDeque<(SyntaxKind, &str)> {
|
||||
fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
|
||||
m_lexer::TokenKind(cstree::SyntaxKind::from(t).0)
|
||||
m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0)
|
||||
}
|
||||
fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
|
||||
match t.0 {
|
||||
|
|
50
examples/salsa.rs
Normal file
50
examples/salsa.rs
Normal file
|
@ -0,0 +1,50 @@
|
|||
#![cfg(feature = "salsa_2022_compat")]
|
||||
|
||||
use cstree::{build::GreenNodeBuilder, impl_cstree_interning_for_salsa};
|
||||
|
||||
#[salsa::jar(db = Db)]
|
||||
pub struct Jar(crate::SourceId);
|
||||
|
||||
pub trait Db: salsa::DbWithJar<Jar> {}
|
||||
impl<DB> Db for DB where DB: ?Sized + salsa::DbWithJar<Jar> {}
|
||||
|
||||
#[salsa::interned]
|
||||
pub struct SourceId {
|
||||
#[return_ref]
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
#[salsa::db(crate::Jar)]
|
||||
struct Database {
|
||||
storage: salsa::Storage<Self>,
|
||||
}
|
||||
|
||||
impl salsa::Database for Database {}
|
||||
|
||||
impl_cstree_interning_for_salsa!(impl Interning for Database => text as SourceId);
|
||||
|
||||
use cstree::{syntax::SyntaxNode, testing::*};
|
||||
|
||||
fn main() {
|
||||
let db = Database::default();
|
||||
let interned = SourceId::new(&db, "foo".to_string());
|
||||
let original = interned.text(&db);
|
||||
assert_eq!(original, "foo");
|
||||
|
||||
let interner = db.as_interner();
|
||||
let mut shared_interner = &interner;
|
||||
let mut builder: GreenNodeBuilder<TestLang, _> = GreenNodeBuilder::with_interner(&mut shared_interner);
|
||||
let (tree, _no_interner_because_it_was_borrowed) = {
|
||||
builder.start_node(TestSyntaxKind::Plus);
|
||||
builder.token(TestSyntaxKind::Float, "2.05");
|
||||
builder.token(TestSyntaxKind::Whitespace, " ");
|
||||
builder.token(TestSyntaxKind::Plus, "+");
|
||||
builder.token(TestSyntaxKind::Whitespace, " ");
|
||||
builder.token(TestSyntaxKind::Float, "7.32");
|
||||
builder.finish_node();
|
||||
builder.finish()
|
||||
};
|
||||
let tree: SyntaxNode<TestLang> = SyntaxNode::new_root(tree);
|
||||
assert_eq!(tree.resolve_text(shared_interner), "2.05 + 7.32");
|
||||
}
|
19
src/green.rs
19
src/green.rs
|
@ -1,10 +1,9 @@
|
|||
//! Implementation of the inner, "green" tree.
|
||||
//! The [`GreenNodeBuilder`] is the main entry point to constructing [`GreenNode`]s and
|
||||
//! [`GreenToken`]s.
|
||||
//! The [`GreenNodeBuilder`](crate::build::GreenNodeBuilder) from the [`build` module](crate::build) is the main entry
|
||||
//! point to constructing [`GreenNode`]s and [`GreenToken`]s.
|
||||
|
||||
mod builder;
|
||||
pub(super) mod builder;
|
||||
mod element;
|
||||
mod interner;
|
||||
mod iter;
|
||||
mod node;
|
||||
mod token;
|
||||
|
@ -12,17 +11,7 @@ mod token;
|
|||
pub(crate) use self::element::GreenElementRef;
|
||||
use self::element::{GreenElement, PackedGreenElement};
|
||||
|
||||
pub use self::{
|
||||
builder::{Checkpoint, GreenNodeBuilder, NodeCache},
|
||||
interner::TokenInterner,
|
||||
iter::GreenNodeChildren,
|
||||
node::GreenNode,
|
||||
token::GreenToken,
|
||||
};
|
||||
|
||||
/// SyntaxKind is a type tag for each token or node.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct SyntaxKind(pub u16);
|
||||
pub use self::{iter::GreenNodeChildren, node::GreenNode, token::GreenToken};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
|
|
@ -4,10 +4,11 @@ use fxhash::{FxHashMap, FxHasher32};
|
|||
use text_size::TextSize;
|
||||
|
||||
use crate::{
|
||||
green::{interner::TokenInterner, GreenElement, GreenNode, GreenToken, SyntaxKind},
|
||||
interning::{Interner, Key},
|
||||
green::{GreenElement, GreenNode, GreenToken},
|
||||
interning::{new_interner, Interner, TokenInterner, TokenKey},
|
||||
util::NodeOrToken,
|
||||
utility_types::MaybeOwned,
|
||||
Language, NodeOrToken,
|
||||
Language, RawSyntaxKind,
|
||||
};
|
||||
|
||||
use super::{node::GreenNodeHead, token::GreenTokenData};
|
||||
|
@ -35,6 +36,8 @@ impl NodeCache<'static> {
|
|||
/// # Examples
|
||||
/// ```
|
||||
/// # use cstree::testing::{*, Language as _};
|
||||
/// use cstree::build::NodeCache;
|
||||
///
|
||||
/// // Build a tree
|
||||
/// let mut cache = NodeCache::new();
|
||||
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::with_cache(&mut cache);
|
||||
|
@ -53,7 +56,7 @@ impl NodeCache<'static> {
|
|||
Self {
|
||||
nodes: FxHashMap::default(),
|
||||
tokens: FxHashMap::default(),
|
||||
interner: MaybeOwned::Owned(TokenInterner::new()),
|
||||
interner: MaybeOwned::Owned(new_interner()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -66,19 +69,21 @@ impl Default for NodeCache<'static> {
|
|||
|
||||
impl<'i, I> NodeCache<'i, I>
|
||||
where
|
||||
I: Interner,
|
||||
I: Interner<TokenKey>,
|
||||
{
|
||||
/// Constructs a new, empty cache that will use the given interner to deduplicate source text
|
||||
/// (strings) across tokens.
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// # use cstree::testing::{*, Language as _};
|
||||
/// use lasso::Rodeo;
|
||||
/// # use cstree::interning::*;
|
||||
/// use cstree::build::NodeCache;
|
||||
///
|
||||
/// // Create the builder from a custom `Rodeo`
|
||||
/// let mut interner = Rodeo::new();
|
||||
/// // Create the builder from a custom interner
|
||||
/// let mut interner = new_interner();
|
||||
/// let mut cache = NodeCache::with_interner(&mut interner);
|
||||
/// let mut builder: GreenNodeBuilder<MyLanguage, Rodeo> = GreenNodeBuilder::with_cache(&mut cache);
|
||||
/// let mut builder: GreenNodeBuilder<MyLanguage, TokenInterner> =
|
||||
/// GreenNodeBuilder::with_cache(&mut cache);
|
||||
///
|
||||
/// // Construct the tree
|
||||
/// # builder.start_node(Root);
|
||||
|
@ -107,12 +112,14 @@ where
|
|||
/// # Examples
|
||||
/// ```
|
||||
/// # use cstree::testing::{*, Language as _};
|
||||
/// use lasso::Rodeo;
|
||||
/// # use cstree::interning::*;
|
||||
/// use cstree::build::NodeCache;
|
||||
///
|
||||
/// // Create the builder from a custom `Rodeo`
|
||||
/// let mut interner = Rodeo::new();
|
||||
/// // Create the builder from a custom interner
|
||||
/// let mut interner = new_interner();
|
||||
/// let cache = NodeCache::from_interner(interner);
|
||||
/// let mut builder: GreenNodeBuilder<MyLanguage, Rodeo> = GreenNodeBuilder::from_cache(cache);
|
||||
/// let mut builder: GreenNodeBuilder<MyLanguage, TokenInterner> =
|
||||
/// GreenNodeBuilder::from_cache(cache);
|
||||
///
|
||||
/// // Construct the tree
|
||||
/// # builder.start_node(Root);
|
||||
|
@ -142,22 +149,23 @@ where
|
|||
/// See also [`interner_mut`](NodeCache::interner_mut).
|
||||
#[inline]
|
||||
pub fn interner(&self) -> &I {
|
||||
&*self.interner
|
||||
&self.interner
|
||||
}
|
||||
|
||||
/// Get a mutable reference to the interner used to deduplicate source text (strings).
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// # use cstree::*;
|
||||
/// # use cstree::build::*;
|
||||
/// # use cstree::interning::*;
|
||||
/// let mut cache = NodeCache::new();
|
||||
/// let interner = cache.interner_mut();
|
||||
/// let key = interner.get_or_intern("foo");
|
||||
/// assert_eq!(interner.resolve(&key), "foo");
|
||||
/// assert_eq!(interner.resolve(key), "foo");
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn interner_mut(&mut self) -> &mut I {
|
||||
&mut *self.interner
|
||||
&mut self.interner
|
||||
}
|
||||
|
||||
/// If this node cache was constructed with [`new`](NodeCache::new) or
|
||||
|
@ -196,7 +204,7 @@ where
|
|||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn intern(&mut self, text: &str) -> Key {
|
||||
fn intern(&mut self, text: &str) -> TokenKey {
|
||||
self.interner.get_or_intern(text)
|
||||
}
|
||||
|
||||
|
@ -205,7 +213,7 @@ where
|
|||
#[inline]
|
||||
fn get_cached_node(
|
||||
&mut self,
|
||||
kind: SyntaxKind,
|
||||
kind: RawSyntaxKind,
|
||||
children: std::vec::Drain<'_, GreenElement>,
|
||||
text_len: TextSize,
|
||||
child_hash: u32,
|
||||
|
@ -221,7 +229,7 @@ where
|
|||
.clone()
|
||||
}
|
||||
|
||||
fn token<L: Language>(&mut self, kind: L::Kind, text: Option<Key>, len: u32) -> GreenToken {
|
||||
fn token<L: Language>(&mut self, kind: L::Kind, text: Option<TokenKey>, len: u32) -> GreenToken {
|
||||
let text_len = TextSize::from(len);
|
||||
let kind = L::kind_to_raw(kind);
|
||||
let data = GreenTokenData { kind, text, text_len };
|
||||
|
@ -246,7 +254,6 @@ pub struct Checkpoint(usize);
|
|||
/// # Examples
|
||||
/// ```
|
||||
/// # use cstree::testing::{*, Language as _};
|
||||
/// # use cstree::interning::IntoResolver;
|
||||
/// // Build a tree
|
||||
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
|
||||
/// builder.start_node(Root);
|
||||
|
@ -258,7 +265,7 @@ pub struct Checkpoint(usize);
|
|||
/// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root));
|
||||
/// let int = tree.children().next().unwrap();
|
||||
/// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int));
|
||||
/// let resolver = cache.unwrap().into_interner().unwrap().into_resolver();
|
||||
/// let resolver = cache.unwrap().into_interner().unwrap();
|
||||
/// assert_eq!(int.as_token().unwrap().text(&resolver), Some("42"));
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
|
@ -288,7 +295,7 @@ impl<L: Language> Default for GreenNodeBuilder<'static, 'static, L> {
|
|||
impl<'cache, 'interner, L, I> GreenNodeBuilder<'cache, 'interner, L, I>
|
||||
where
|
||||
L: Language,
|
||||
I: Interner,
|
||||
I: Interner<TokenKey>,
|
||||
{
|
||||
/// Reusing a [`NodeCache`] between multiple builders saves memory, as it allows to structurally
|
||||
/// share underlying trees.
|
||||
|
@ -306,6 +313,7 @@ where
|
|||
/// # Examples
|
||||
/// ```
|
||||
/// # use cstree::testing::{*, Language as _};
|
||||
/// # use cstree::build::*;
|
||||
/// // Construct a builder from our own cache
|
||||
/// let cache = NodeCache::new();
|
||||
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::from_cache(cache);
|
||||
|
@ -358,7 +366,7 @@ where
|
|||
/// See also [`interner_mut`](GreenNodeBuilder::interner_mut).
|
||||
#[inline]
|
||||
pub fn interner(&self) -> &I {
|
||||
&*self.cache.interner
|
||||
&self.cache.interner
|
||||
}
|
||||
|
||||
/// Get a mutable reference to the interner used to deduplicate source text (strings).
|
||||
|
@ -367,20 +375,19 @@ where
|
|||
/// # Examples
|
||||
/// ```
|
||||
/// # use cstree::testing::*;
|
||||
/// # use cstree::build::*;
|
||||
/// # use cstree::interning::*;
|
||||
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
|
||||
/// let interner = builder.interner_mut();
|
||||
/// let key = interner.get_or_intern("foo");
|
||||
/// assert_eq!(interner.resolve(&key), "foo");
|
||||
/// assert_eq!(interner.resolve(key), "foo");
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn interner_mut(&mut self) -> &mut I {
|
||||
&mut *self.cache.interner
|
||||
&mut self.cache.interner
|
||||
}
|
||||
|
||||
/// Add a new token to the current branch without storing an explicit section of text.
|
||||
/// This is be useful if the text can always be inferred from the token's `kind`, for example
|
||||
/// when using kinds for specific operators or punctuation.
|
||||
/// Add a new token with the given `text` to the current node.
|
||||
///
|
||||
/// ## Panics
|
||||
/// In debug mode, if `kind` has static text, this function will verify that `text` matches that text.
|
||||
|
@ -403,6 +410,22 @@ where
|
|||
self.children.push(token.into());
|
||||
}
|
||||
|
||||
/// Add a new token to the current node without storing an explicit section of text.
|
||||
/// This is be useful if the text can always be inferred from the token's `kind`, for example
|
||||
/// when using kinds for specific operators or punctuation.
|
||||
///
|
||||
/// For tokens whose textual representation is not static, such as numbers or identifiers, use
|
||||
/// [`token`](GreenNodeBuilder::token).
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `kind` does not have static text, i.e., `L::static_text(kind)` returns `None`.
|
||||
#[inline]
|
||||
pub fn static_token(&mut self, kind: L::Kind) {
|
||||
let static_text = L::static_text(kind).unwrap_or_else(|| panic!("Missing static text for '{kind:?}'"));
|
||||
let token = self.cache.token::<L>(kind, None, static_text.len() as u32);
|
||||
self.children.push(token.into());
|
||||
}
|
||||
|
||||
/// Start new node of the given `kind` and make it current.
|
||||
#[inline]
|
||||
pub fn start_node(&mut self, kind: L::Kind) {
|
||||
|
@ -427,7 +450,7 @@ where
|
|||
/// # Examples
|
||||
/// ```
|
||||
/// # use cstree::testing::*;
|
||||
/// # use cstree::{GreenNodeBuilder, Language};
|
||||
/// # use cstree::{build::GreenNodeBuilder, Language};
|
||||
/// # struct Parser;
|
||||
/// # impl Parser {
|
||||
/// # fn peek(&self) -> Option<TestSyntaxKind> { None }
|
||||
|
|
|
@ -7,8 +7,10 @@ type ErasedPtr = *const u8;
|
|||
use sptr::Strict;
|
||||
|
||||
use crate::{
|
||||
green::{GreenNode, GreenToken, SyntaxKind},
|
||||
NodeOrToken, TextSize,
|
||||
green::{GreenNode, GreenToken},
|
||||
text::TextSize,
|
||||
util::NodeOrToken,
|
||||
RawSyntaxKind,
|
||||
};
|
||||
|
||||
pub(super) type GreenElement = NodeOrToken<GreenNode, GreenToken>;
|
||||
|
@ -64,7 +66,7 @@ impl From<GreenToken> for PackedGreenElement {
|
|||
impl GreenElement {
|
||||
/// Returns kind of this element.
|
||||
#[inline]
|
||||
pub fn kind(&self) -> SyntaxKind {
|
||||
pub fn kind(&self) -> RawSyntaxKind {
|
||||
self.as_ref().kind()
|
||||
}
|
||||
|
||||
|
@ -78,7 +80,7 @@ impl GreenElement {
|
|||
impl GreenElementRef<'_> {
|
||||
/// Returns kind of this element.
|
||||
#[inline]
|
||||
pub fn kind(&self) -> SyntaxKind {
|
||||
pub fn kind(&self) -> RawSyntaxKind {
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.kind(),
|
||||
NodeOrToken::Token(it) => it.kind(),
|
||||
|
|
|
@ -1,126 +0,0 @@
|
|||
use std::num::NonZeroUsize;
|
||||
|
||||
use crate::interning::{
|
||||
Capacity, Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Key, Reader, Resolver, Rodeo,
|
||||
};
|
||||
use fxhash::FxBuildHasher;
|
||||
|
||||
/// The default [`Interner`] used to deduplicate green token strings.
|
||||
#[derive(Debug)]
|
||||
pub struct TokenInterner {
|
||||
rodeo: Rodeo,
|
||||
}
|
||||
|
||||
impl TokenInterner {
|
||||
pub(super) fn new() -> Self {
|
||||
Self {
|
||||
rodeo: Rodeo::with_capacity_and_hasher(
|
||||
// capacity values suggested by author of `lasso`
|
||||
Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }),
|
||||
FxBuildHasher::default(),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Resolver for TokenInterner {
|
||||
#[inline]
|
||||
fn resolve<'a>(&'a self, key: &Key) -> &'a str {
|
||||
self.rodeo.resolve(key)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn try_resolve<'a>(&'a self, key: &Key) -> Option<&'a str> {
|
||||
self.rodeo.try_resolve(key)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn resolve_unchecked<'a>(&'a self, key: &Key) -> &'a str {
|
||||
self.rodeo.resolve_unchecked(key)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn contains_key(&self, key: &Key) -> bool {
|
||||
self.rodeo.contains_key(key)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn len(&self) -> usize {
|
||||
self.rodeo.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl Reader for TokenInterner {
|
||||
#[inline]
|
||||
fn get(&self, val: &str) -> Option<Key> {
|
||||
self.rodeo.get(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn contains(&self, val: &str) -> bool {
|
||||
self.rodeo.contains(val)
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoResolver for TokenInterner {
|
||||
type Resolver = <Rodeo as IntoResolver>::Resolver;
|
||||
|
||||
#[inline]
|
||||
fn into_resolver(self) -> Self::Resolver
|
||||
where
|
||||
Self: 'static,
|
||||
{
|
||||
self.rodeo.into_resolver()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn into_resolver_boxed(self: Box<Self>) -> Self::Resolver
|
||||
where
|
||||
Self: 'static,
|
||||
{
|
||||
Rodeo::into_resolver_boxed(Box::new(self.rodeo))
|
||||
}
|
||||
}
|
||||
|
||||
impl Interner for TokenInterner {
|
||||
#[inline]
|
||||
fn get_or_intern(&mut self, val: &str) -> Key {
|
||||
self.rodeo.get_or_intern(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn try_get_or_intern(&mut self, val: &str) -> lasso::LassoResult<Key> {
|
||||
self.rodeo.try_get_or_intern(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_or_intern_static(&mut self, val: &'static str) -> Key {
|
||||
self.rodeo.get_or_intern_static(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn try_get_or_intern_static(&mut self, val: &'static str) -> lasso::LassoResult<Key> {
|
||||
self.rodeo.try_get_or_intern_static(val)
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoReader for TokenInterner {
|
||||
type Reader = <Rodeo as IntoReader>::Reader;
|
||||
|
||||
#[inline]
|
||||
fn into_reader(self) -> Self::Reader
|
||||
where
|
||||
Self: 'static,
|
||||
{
|
||||
self.rodeo.into_reader()
|
||||
}
|
||||
|
||||
fn into_reader_boxed(self: Box<Self>) -> Self::Reader
|
||||
where
|
||||
Self: 'static,
|
||||
{
|
||||
Rodeo::into_reader_boxed(Box::new(self.rodeo))
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoReaderAndResolver for TokenInterner {}
|
|
@ -4,7 +4,7 @@ use std::{iter::FusedIterator, slice};
|
|||
|
||||
use super::{element::PackedGreenElement, GreenElementRef};
|
||||
|
||||
/// An iterator over a [`GreenNode`](crate::GreenNode)'s children.
|
||||
/// An iterator over a [`GreenNode`](crate::green::GreenNode)'s children.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GreenNodeChildren<'a> {
|
||||
pub(super) inner: slice::Iter<'a, PackedGreenElement>,
|
||||
|
|
|
@ -6,15 +6,16 @@ use std::{
|
|||
use fxhash::FxHasher32;
|
||||
|
||||
use crate::{
|
||||
green::{iter::GreenNodeChildren, GreenElement, PackedGreenElement, SyntaxKind},
|
||||
TextSize,
|
||||
green::{iter::GreenNodeChildren, GreenElement, PackedGreenElement},
|
||||
text::TextSize,
|
||||
RawSyntaxKind,
|
||||
};
|
||||
use triomphe::{Arc, HeaderWithLength, ThinArc};
|
||||
|
||||
#[repr(align(2))] //to use 1 bit for pointer tagging. NB: this is an at-least annotation
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub(super) struct GreenNodeHead {
|
||||
pub(super) kind: SyntaxKind,
|
||||
pub(super) kind: RawSyntaxKind,
|
||||
pub(super) text_len: TextSize,
|
||||
pub(super) child_hash: u32,
|
||||
}
|
||||
|
@ -35,7 +36,7 @@ impl std::fmt::Debug for GreenNode {
|
|||
impl GreenNode {
|
||||
/// Creates a new Node.
|
||||
#[inline]
|
||||
pub fn new<I>(kind: SyntaxKind, children: I) -> GreenNode
|
||||
pub fn new<I>(kind: RawSyntaxKind, children: I) -> GreenNode
|
||||
where
|
||||
I: IntoIterator<Item = GreenElement>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
|
@ -72,7 +73,7 @@ impl GreenNode {
|
|||
/// Creates a new Node.
|
||||
#[inline]
|
||||
pub(super) fn new_with_len_and_hash<I>(
|
||||
kind: SyntaxKind,
|
||||
kind: RawSyntaxKind,
|
||||
children: I,
|
||||
text_len: TextSize,
|
||||
child_hash: u32,
|
||||
|
@ -115,9 +116,9 @@ impl GreenNode {
|
|||
}
|
||||
}
|
||||
|
||||
/// [`SyntaxKind`] of this node.
|
||||
/// [`RawSyntaxKind`] of this node.
|
||||
#[inline]
|
||||
pub fn kind(&self) -> SyntaxKind {
|
||||
pub fn kind(&self) -> RawSyntaxKind {
|
||||
self.data.header.header.kind
|
||||
}
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
use std::{fmt, hash, mem::ManuallyDrop, ptr::NonNull};
|
||||
|
||||
use crate::{
|
||||
green::SyntaxKind,
|
||||
interning::{Key, Resolver},
|
||||
TextSize,
|
||||
interning::{Resolver, TokenKey},
|
||||
text::TextSize,
|
||||
RawSyntaxKind,
|
||||
};
|
||||
use sptr::Strict;
|
||||
use triomphe::Arc;
|
||||
|
@ -11,8 +11,8 @@ use triomphe::Arc;
|
|||
#[repr(align(2))] // to use 1 bit for pointer tagging. NB: this is an at-least annotation
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
|
||||
pub(super) struct GreenTokenData {
|
||||
pub(super) kind: SyntaxKind,
|
||||
pub(super) text: Option<Key>,
|
||||
pub(super) kind: RawSyntaxKind,
|
||||
pub(super) text: Option<TokenKey>,
|
||||
pub(super) text_len: TextSize,
|
||||
}
|
||||
|
||||
|
@ -54,9 +54,9 @@ impl GreenToken {
|
|||
}
|
||||
}
|
||||
|
||||
/// [`SyntaxKind`] of this Token.
|
||||
/// [`RawSyntaxKind`] of this Token.
|
||||
#[inline]
|
||||
pub fn kind(&self) -> SyntaxKind {
|
||||
pub fn kind(&self) -> RawSyntaxKind {
|
||||
self.data().kind
|
||||
}
|
||||
|
||||
|
@ -64,9 +64,9 @@ impl GreenToken {
|
|||
#[inline]
|
||||
pub fn text<'i, I>(&self, resolver: &'i I) -> Option<&'i str>
|
||||
where
|
||||
I: Resolver + ?Sized,
|
||||
I: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
self.data().text.map(|key| resolver.resolve(&key))
|
||||
self.data().text.map(|key| resolver.resolve(key))
|
||||
}
|
||||
|
||||
/// Returns the length of text covered by this token.
|
||||
|
@ -80,7 +80,7 @@ impl GreenToken {
|
|||
///
|
||||
/// See also [`text`](GreenToken::text).
|
||||
#[inline]
|
||||
pub fn text_key(&self) -> Option<Key> {
|
||||
pub fn text_key(&self) -> Option<TokenKey> {
|
||||
self.data().text
|
||||
}
|
||||
}
|
||||
|
|
205
src/interning.rs
205
src/interning.rs
|
@ -1,47 +1,186 @@
|
|||
//! Types and Traits for efficient String storage and deduplication.
|
||||
//!
|
||||
//! Interning functionality is provided by the [`lasso`](lasso) crate.
|
||||
//! Because `cstree` is aimed at _concrete_ syntax trees that faithfully represent all of the original program input,
|
||||
//! `cstree` aks for the text of each token when building a syntax tree. You'll notice this when looking at
|
||||
//! [`GreenNodeBuilder::token`], which takes the kind of token and a refernce to the text of the token in the source.
|
||||
//!
|
||||
//! Of course, there are tokens whose text will always be the same, such as punctuation (like a semicolon), keywords
|
||||
//! (like `fn`), or operators (like `<=`). Use [`Language::static_text`] when implementing `Language` to make `cstree`
|
||||
//! aware of such tokens.
|
||||
//!
|
||||
//! There is, however, another category of tokens whose text will appear repeatedly, but for which we cannot know the
|
||||
//! text upfront. Any variable, type, or method that is user-defined will likely be named more than once, but there is
|
||||
//! no way to know beforehand what names a user will choose.
|
||||
//!
|
||||
//! In order to avoid storing the source text for these tokens many times over, `cstree` _interns_ the text of its
|
||||
//! tokens (if that text is not static). What this means is that each unique string is only stored once. When a new
|
||||
//! token is added - say, a variable -, we check if we already know its contents (the variable name). If the text is
|
||||
//! new, we save it and give it a unique Id. If we have seen the text before, we look up its unique Id and don't need to
|
||||
//! keep the new data around. As an additional benefit, interning also makes it much cheaper to copy source text around
|
||||
//! and also to compare it with other source text, since what is actually being copied or compared is just an integer.
|
||||
//!
|
||||
//! ## I just want to build a syntax tree
|
||||
//!
|
||||
//! If you don't want to worry about this for now, you (mostly) can! All required functionality is implemented in
|
||||
//! `cstree` and you can just use [`GreenNodeBuilder::new`] to obtain a tree builder with everything set up (see the
|
||||
//! [crate documentation] for more on how to get started). This will create an interner, which the builder returns
|
||||
//! together with the syntax tree on [`finish`] as part of its node cache (call [`NodeCache::into_interner`] on the
|
||||
//! result to get the interner out).
|
||||
//!
|
||||
//! Here begins the part where you do have to think about interning: `cstree` needs the interner you get when you want
|
||||
//! to look at the source text for some part of the syntax tree, so you'll have to keep it around somehow until the
|
||||
//! point where you need it.
|
||||
//!
|
||||
//! How best to do this depends on what you need the text for. If the code that accesses the text is close-by, it might
|
||||
//! be enough to pass the return value to the functions that need it (within `cstree` or in your code). Other options
|
||||
//! could be to store the interner together with the syntax tree. If you use [`SyntaxNode::new_root_with_resolver`], you
|
||||
//! get a syntax tree that can handle text without any need to manage and pass an interner (the reason the method is
|
||||
//! called `_with_resolver` and not `_with_interner` is that it doesn't actually needs a full [`Interner`] -- once the
|
||||
//! tree is created, no more text will be added, so it just needs to be able to look up text. This part is called a
|
||||
//! [`Resolver`]). Or you could put the interner somewhere "global", where you can easily access it from anywhere.
|
||||
//!
|
||||
//! ## Using other interners
|
||||
//!
|
||||
//! By default, `cstree` uses its own, simple interner implementation. You can obtain an interner by calling
|
||||
//! [`new_interner`], or bring your own by implementing the [`Resolver`] and [`Interner`] traits defined in this module.
|
||||
//! Most methods in `cstree` require that you support interning [`TokenKey`]s. `TokenKey` implements [`InternKey`], so
|
||||
//! your implementation can use that to convert to whatever types it uses for its internal representation. Note that
|
||||
//! there is no way to change the size of the internal representation.
|
||||
//!
|
||||
//! ### `lasso`
|
||||
//! Using features, you can enable support for some third-party interners. The primary one is [`lasso`], a crate focused
|
||||
//! on efficient interning of text strings. This is enabled via the `lasso_compat` feature and adds the necessary trait
|
||||
//! implementation to make `lasso`'s interners work with `cstree` (as well as a re-export of the matching version of
|
||||
//! `lasso` here). If enabled, `cstree`'s built-in interning functionality is replaced with `lasso`'s more efficient one
|
||||
//! transparently, so you'll now be returned a `lasso` interner from [`new_interner`].
|
||||
//!
|
||||
//! ### `salsa`
|
||||
//! If you are using the "2022" version of the `salsa` incremental query framework, it is possible to use its interning
|
||||
//! capabilities with `cstree` as well. Support for this is experimental, and you have to opt in via the
|
||||
//! `salsa_2022_compat` feature. For instructions on how to do this, and whether you actually want to, please refer to
|
||||
//! [the `salsa_compat` module documentation].
|
||||
//!
|
||||
//! ## Multi-threaded interners
|
||||
//! If you want to use your interner on more than one thread, the interner needs to support interning new text through
|
||||
//! shared access. With the `multi_threaded_interning` feature, you can get such an interner by calling
|
||||
//! [`new_threaded_interner`]. The feature also enables support for `ThreadedRodeo`, the multi-threaded interner from
|
||||
//! `lasso`.
|
||||
//!
|
||||
//! **You can pass a reference to that interner to anything that expects an [`Interner`]!**
|
||||
//! While the interning methods on [`Interner`] require a `&mut self` to also work for single-threaded interners, both
|
||||
//! [`Resolver`] and [`Interner`] will be implemented for `&interner` if `interner` is multi-threaded:
|
||||
//!
|
||||
//! ```
|
||||
//! # use cstree::testing::{*, Language as _};
|
||||
//! # use cstree::interning::*;
|
||||
//!
|
||||
//! let interner = new_threaded_interner();
|
||||
//! let mut builder: GreenNodeBuilder<MyLanguage, &MultiThreadedTokenInterner> =
|
||||
//! GreenNodeBuilder::from_interner(&interner);
|
||||
//!
|
||||
//! # builder.start_node(Root);
|
||||
//! # builder.token(Int, "42");
|
||||
//! # builder.finish_node();
|
||||
//! parse(&mut builder, "42");
|
||||
//! let (tree, cache) = builder.finish();
|
||||
//!
|
||||
//! // Note that we get a cache and interner back, because we passed an "owned" reference to `from_interner`
|
||||
//! let used_interner = cache.unwrap().into_interner().unwrap();
|
||||
//! assert_eq!(used_interner as *const _, &interner as *const _);
|
||||
//!
|
||||
//! let int = tree.children().next().unwrap();
|
||||
//! assert_eq!(int.as_token().unwrap().text(&interner), Some("42"));
|
||||
//! ```
|
||||
//!
|
||||
//! Here, we use `from_interner`, but pass it only a shared reference to "own". Take care to denote the type signature
|
||||
//! of the `GreenNodeBuilder` appropriately.
|
||||
//!
|
||||
//! [crate documentation]: crate
|
||||
//! [`Language::static_text`]: crate::Language::static_text
|
||||
//! [`GreenNodeBuilder::token`]: crate::build::GreenNodeBuilder::token
|
||||
//! [`GreenNodeBuilder::new`]: crate::build::GreenNodeBuilder::new
|
||||
//! [`finish`]: crate::build::GreenNodeBuilder::finish
|
||||
//! [`NodeCache::into_interner`]: crate::build::NodeCache::into_interner
|
||||
//! [`SyntaxNode::new_root_with_resolver`]: crate::syntax::SyntaxNode::new_root_with_resolver
|
||||
//! [`lasso`]: lasso
|
||||
//! [the `salsa_compat` module documentation]: salsa_compat
|
||||
|
||||
pub use fxhash::FxBuildHasher as Hasher;
|
||||
mod traits;
|
||||
pub use self::traits::*;
|
||||
|
||||
pub use crate::green::TokenInterner;
|
||||
mod default_interner;
|
||||
|
||||
/// The index type for all interners. Each key represents
|
||||
pub type Key = lasso::Spur;
|
||||
pub use lasso::{Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver};
|
||||
#[cfg(not(feature = "lasso_compat"))]
|
||||
#[doc(inline)]
|
||||
pub use default_interner::TokenInterner;
|
||||
|
||||
/// A string interner that caches strings quickly with a minimal memory footprint, returning a unique key to re-access
|
||||
/// it with `O(1)` times. By default, `Rodeo` uses an [`fxhash`] [`Hasher`].
|
||||
pub type Rodeo<S = Hasher> = lasso::Rodeo<Key, S>;
|
||||
#[cfg(feature = "lasso_compat")]
|
||||
mod lasso_compat;
|
||||
|
||||
/// Constructs a new, single-threaded interner.
|
||||
#[cfg(feature = "lasso_compat")]
|
||||
#[doc(inline)]
|
||||
pub use lasso_compat::TokenInterner;
|
||||
|
||||
#[cfg(feature = "multi_threaded_interning")]
|
||||
#[doc(inline)]
|
||||
pub use lasso_compat::MultiThreadedTokenInterner;
|
||||
|
||||
#[cfg(feature = "lasso_compat")]
|
||||
#[cfg_attr(doc_cfg, doc(cfg(feature = "lasso_compat")))]
|
||||
pub use lasso;
|
||||
|
||||
#[cfg(feature = "salsa_2022_compat")]
|
||||
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
|
||||
pub mod salsa_compat;
|
||||
|
||||
use core::fmt;
|
||||
use std::num::NonZeroU32;
|
||||
|
||||
/// The intern key type for the source text of [`GreenToken`s](crate::green::GreenToken).
|
||||
/// Each unique key uniquely identifies a deduplicated, interned source string.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
|
||||
#[repr(transparent)]
|
||||
pub struct TokenKey {
|
||||
inner: NonZeroU32,
|
||||
}
|
||||
|
||||
// Safety: we match `+ 1` and `- 1`, so it is always possible to round-trip.
|
||||
unsafe impl InternKey for TokenKey {
|
||||
#[inline]
|
||||
fn into_u32(self) -> u32 {
|
||||
self.inner.get() - 1
|
||||
}
|
||||
|
||||
fn try_from_u32(key: u32) -> Option<Self> {
|
||||
(key < u32::MAX).then(|| Self {
|
||||
// Safety: non-zero by increment.
|
||||
// Overflow is impossible under the check above.
|
||||
inner: unsafe { NonZeroU32::new_unchecked(key + 1) },
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for TokenKey {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_fmt(format_args!("TokenKey({})", self.inner))
|
||||
}
|
||||
}
|
||||
|
||||
/// Constructs a new, single-threaded [`Interner`](traits::Interner).
|
||||
///
|
||||
/// If you need the interner to be multi-threaded, see [`new_threaded_interner`].
|
||||
#[inline]
|
||||
pub fn new_interner() -> Rodeo {
|
||||
Rodeo::with_hasher(Hasher::default())
|
||||
pub fn new_interner() -> TokenInterner {
|
||||
TokenInterner::new()
|
||||
}
|
||||
|
||||
/// A string interner that caches strings quickly with a minimal memory footprint, returning a unique key to re-access
|
||||
/// it with `O(1)` times. By default, `ThreadedRodeo` uses an [`fxhash`] [`Hasher`].
|
||||
pub type ThreadedRodeo<S = Hasher> = lasso::ThreadedRodeo<Key, S>;
|
||||
|
||||
/// Constructs a new interner that can be used across multiple threads.
|
||||
/// Constructs a new [`Interner`](traits::Interner) that can be used across multiple threads.
|
||||
///
|
||||
/// Note that you can use `&MultiThreadTokenInterner` to access interning methods through a shared reference, as well as
|
||||
/// construct new syntax trees. See [the module documentation](self) for more information and examples.
|
||||
#[cfg(feature = "multi_threaded_interning")]
|
||||
#[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))]
|
||||
#[inline]
|
||||
pub fn new_threaded_interner() -> ThreadedRodeo {
|
||||
ThreadedRodeo::with_hasher(Hasher::default())
|
||||
pub fn new_threaded_interner() -> MultiThreadedTokenInterner {
|
||||
MultiThreadedTokenInterner::new()
|
||||
}
|
||||
|
||||
/// A read-only view of a [`Rodeo`] or [`ThreadedRodeo`] that allows contention-free access to interned strings, both
|
||||
/// key to string resolution and string to key lookups.
|
||||
///
|
||||
/// The hasher is the same as the Rodeo or ThreadedRodeo that created it.
|
||||
/// Can be acquired with the `into_reader` methods (see also [`IntoReader`]).
|
||||
pub type RodeoReader<S = Hasher> = lasso::RodeoReader<Key, S>;
|
||||
|
||||
/// A read-only view of a [`Rodeo`] or [`ThreadedRodeo`] that allows contention-free access to interned strings with
|
||||
/// only key to string resolution.
|
||||
///
|
||||
/// Can be acquired with the `into_resolver` methods (see also [`IntoResolver`]).
|
||||
pub type RodeoResolver = lasso::RodeoResolver<Key>;
|
||||
pub use lasso::{Capacity, Iter, LassoError, LassoErrorKind, LassoResult, MemoryLimits, Strings};
|
||||
|
|
70
src/interning/default_interner.rs
Normal file
70
src/interning/default_interner.rs
Normal file
|
@ -0,0 +1,70 @@
|
|||
#![cfg(not(feature = "lasso_compat"))]
|
||||
|
||||
use core::fmt;
|
||||
|
||||
use fxhash::FxBuildHasher as Hasher;
|
||||
use indexmap::IndexSet;
|
||||
|
||||
use super::{InternKey, Interner, Resolver, TokenKey};
|
||||
|
||||
/// The default [`Interner`] used to deduplicate green token strings.
|
||||
#[derive(Debug)]
|
||||
pub struct TokenInterner {
|
||||
id_set: IndexSet<String, Hasher>,
|
||||
}
|
||||
|
||||
impl TokenInterner {
|
||||
pub(in crate::interning) fn new() -> Self {
|
||||
Self {
|
||||
id_set: IndexSet::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum InternerError {
|
||||
KeySpaceExhausted,
|
||||
}
|
||||
|
||||
impl fmt::Display for InternerError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
InternerError::KeySpaceExhausted => write!(f, "key space exhausted"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for InternerError {}
|
||||
|
||||
impl Resolver<TokenKey> for TokenInterner {
|
||||
fn try_resolve(&self, key: TokenKey) -> Option<&str> {
|
||||
let index = key.into_u32() as usize;
|
||||
self.id_set.get_index(index).map(String::as_str)
|
||||
}
|
||||
}
|
||||
|
||||
// `TokenKey` can represent `1` to `u32::MAX` (due to the `NonNull` niche), so `u32::MAX` elements.
|
||||
// Set indices start at 0, so everything shifts down by 1.
|
||||
const N_INDICES: usize = u32::MAX as usize;
|
||||
|
||||
impl Interner<TokenKey> for TokenInterner {
|
||||
type Error = InternerError;
|
||||
|
||||
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
|
||||
if let Some(index) = self.id_set.get_index_of(text) {
|
||||
let raw_key = u32::try_from(index).unwrap_or_else(|_| {
|
||||
panic!("found interned text with invalid index `{index}` (index too high for keyspace)")
|
||||
});
|
||||
return Ok(TokenKey::try_from_u32(raw_key).unwrap_or_else(|| {
|
||||
panic!("found interned text with invalid index `{index}` (index too high for keyspace)")
|
||||
}));
|
||||
} else if self.id_set.len() >= N_INDICES {
|
||||
return Err(InternerError::KeySpaceExhausted);
|
||||
}
|
||||
|
||||
let (index, added) = self.id_set.insert_full(text.to_string());
|
||||
debug_assert!(added, "tried to intern duplicate text");
|
||||
let raw_key = u32::try_from(index).unwrap_or_else(|_| panic!("interned `{index}` despite keyspace exhaustion"));
|
||||
TokenKey::try_from_u32(raw_key).ok_or(InternerError::KeySpaceExhausted)
|
||||
}
|
||||
}
|
9
src/interning/lasso_compat.rs
Normal file
9
src/interning/lasso_compat.rs
Normal file
|
@ -0,0 +1,9 @@
|
|||
//! Bridge between `cstree`'s and `lasso`'s types and traits.
|
||||
|
||||
#![cfg(feature = "lasso_compat")]
|
||||
|
||||
mod token_interner;
|
||||
#[doc(inline)]
|
||||
pub use token_interner::*;
|
||||
|
||||
mod traits;
|
109
src/interning/lasso_compat/token_interner.rs
Normal file
109
src/interning/lasso_compat/token_interner.rs
Normal file
|
@ -0,0 +1,109 @@
|
|||
//! Default interner implementations based on `lasso`.
|
||||
|
||||
#![cfg(feature = "lasso_compat")]
|
||||
|
||||
use std::{hash::BuildHasher, num::NonZeroUsize};
|
||||
|
||||
use fxhash::FxBuildHasher as Hasher;
|
||||
use lasso::{Capacity, Rodeo, ThreadedRodeo};
|
||||
|
||||
use crate::interning::{Interner, Resolver, TokenKey};
|
||||
|
||||
/// Default number of strings that the interner will initially allocate space for.
|
||||
/// Value recommended by the author of `lasso`.
|
||||
const DEFAULT_STRING_CAPACITY: usize = 512;
|
||||
|
||||
/// Default memory in bytes that the interner will initially allocate space for.
|
||||
/// Value recommended by the author of `lasso`.
|
||||
const DEFAULT_BYTE_CAPACITY: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(4096) };
|
||||
|
||||
macro_rules! impl_traits {
|
||||
(for $interner:ty $(, if #[cfg(feature = $feature:literal)])?) => {
|
||||
$(#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))])?
|
||||
impl Resolver<TokenKey> for $interner {
|
||||
#[inline]
|
||||
fn try_resolve(&self, key: TokenKey) -> Option<&str> {
|
||||
self.rodeo.try_resolve(&key)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn resolve(&self, key: TokenKey) -> &str {
|
||||
self.rodeo.resolve(&key)
|
||||
}
|
||||
}
|
||||
|
||||
$(#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))])?
|
||||
impl Interner<TokenKey> for $interner {
|
||||
type Error = lasso::LassoError;
|
||||
|
||||
#[inline]
|
||||
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
|
||||
self.rodeo.try_get_or_intern(text)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_or_intern(&mut self, text: &str) -> TokenKey {
|
||||
self.rodeo.get_or_intern(text)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// The default [`Interner`] used to deduplicate green token strings.
|
||||
#[derive(Debug)]
|
||||
pub struct TokenInterner {
|
||||
rodeo: Rodeo<TokenKey, Hasher>,
|
||||
}
|
||||
|
||||
impl TokenInterner {
|
||||
pub(in crate::interning) fn new() -> Self {
|
||||
Self {
|
||||
rodeo: Rodeo::with_capacity_and_hasher(
|
||||
Capacity::new(DEFAULT_STRING_CAPACITY, DEFAULT_BYTE_CAPACITY),
|
||||
Hasher::default(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [`Rodeo`] backing this interner.
|
||||
#[cfg_attr(doc_cfg, doc(cfg(feature = "lasso_compat")))]
|
||||
#[inline]
|
||||
pub fn into_inner(self) -> Rodeo<TokenKey, impl BuildHasher> {
|
||||
self.rodeo
|
||||
}
|
||||
}
|
||||
|
||||
impl_traits!(for TokenInterner);
|
||||
|
||||
#[cfg(feature = "multi_threaded_interning")]
|
||||
pub use multi_threaded::MultiThreadedTokenInterner;
|
||||
|
||||
#[cfg(feature = "multi_threaded_interning")]
|
||||
mod multi_threaded {
|
||||
use super::*;
|
||||
|
||||
/// A threadsafe [`Interner`] for deduplicating [`GreenToken`](crate::green::GreenToken) strings.
|
||||
///
|
||||
/// Note that [`Interner`] and [`Resolver`] are also implemented for `&MultiThreadTokenInterner` so you can pass
|
||||
/// `&mut &interner` in shared contexts.
|
||||
#[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))]
|
||||
#[derive(Debug)]
|
||||
pub struct MultiThreadedTokenInterner {
|
||||
rodeo: ThreadedRodeo<TokenKey, Hasher>,
|
||||
}
|
||||
|
||||
impl MultiThreadedTokenInterner {
|
||||
pub(in crate::interning) fn new() -> Self {
|
||||
Self {
|
||||
rodeo: ThreadedRodeo::with_capacity_and_hasher(
|
||||
Capacity::new(DEFAULT_STRING_CAPACITY, DEFAULT_BYTE_CAPACITY),
|
||||
Hasher::default(),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_traits!(for MultiThreadedTokenInterner, if #[cfg(feature = "multi_threaded_interning")]);
|
||||
|
||||
impl_traits!(for &MultiThreadedTokenInterner, if #[cfg(feature = "multi_threaded_interning")]);
|
||||
}
|
166
src/interning/lasso_compat/traits.rs
Normal file
166
src/interning/lasso_compat/traits.rs
Normal file
|
@ -0,0 +1,166 @@
|
|||
#![cfg(feature = "lasso_compat")]
|
||||
|
||||
use core::fmt;
|
||||
use std::hash::{BuildHasher, Hash};
|
||||
|
||||
use crate::interning::{
|
||||
traits::{InternKey, Interner, Resolver},
|
||||
TokenKey,
|
||||
};
|
||||
|
||||
// Safety: `InternKey` has the same invariant as `lasso::Key`
|
||||
unsafe impl lasso::Key for TokenKey {
|
||||
fn into_usize(self) -> usize {
|
||||
self.into_u32() as usize
|
||||
}
|
||||
|
||||
fn try_from_usize(int: usize) -> Option<Self> {
|
||||
let raw_key = u32::try_from(int).ok()?;
|
||||
Self::try_from_u32(raw_key)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum LassoCompatError {
|
||||
LassoError(lasso::LassoError),
|
||||
KeyConversionError { lasso_key: usize },
|
||||
}
|
||||
|
||||
impl From<lasso::LassoError> for LassoCompatError {
|
||||
#[inline]
|
||||
fn from(error: lasso::LassoError) -> Self {
|
||||
Self::LassoError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for LassoCompatError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
LassoCompatError::LassoError(lasso_error) => write!(f, "{lasso_error}"),
|
||||
LassoCompatError::KeyConversionError { lasso_key } => write!(
|
||||
f,
|
||||
"invalid key: failed to convert `lasso::Key` `{lasso_key}` to `InternKey`"
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for LassoCompatError {}
|
||||
|
||||
macro_rules! compat_resolver {
|
||||
($resolver:ident<K$(, $hasher:ident)?> $(where $($t:ident : $bound:ident),+)? $(if #[cfg(feature = $feature:literal)])?) => {
|
||||
$(#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))])?
|
||||
impl<K$(, $hasher)?> Resolver<TokenKey> for lasso::$resolver<K$(, $hasher)?>
|
||||
where
|
||||
K: lasso::Key,
|
||||
$($($t: $bound),+)?
|
||||
{
|
||||
fn try_resolve(&self, key: TokenKey) -> Option<&str> {
|
||||
let raw_key = TokenKey::into_u32(key);
|
||||
let lasso_key = K::try_from_usize(raw_key as usize)?;
|
||||
<Self as lasso::Resolver<K>>::try_resolve(self, &lasso_key)
|
||||
}
|
||||
|
||||
fn resolve(&self, key: TokenKey) -> &str {
|
||||
let raw_key = TokenKey::into_u32(key);
|
||||
let lasso_key = K::try_from_usize(raw_key as usize).expect(&format!(
|
||||
"invalid key: failed to convert `{key:?}` to `lasso::Key`"
|
||||
));
|
||||
<Self as lasso::Resolver<K>>::resolve(self, &lasso_key)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! compat_interner {
|
||||
($interner:ident<K, S> $(where $($t:ident : $bound:ident),+)? if #[cfg(feature = $feature:literal)]) => {
|
||||
#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))]
|
||||
impl<K, S> Interner<TokenKey> for lasso::$interner<K, S>
|
||||
where
|
||||
K: lasso::Key,
|
||||
S: BuildHasher,
|
||||
$($($t: $bound),+)?
|
||||
{
|
||||
type Error = LassoCompatError;
|
||||
|
||||
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
|
||||
let lasso_key = <Self as lasso::Interner<K>>::try_get_or_intern(self, text)?;
|
||||
let raw_key = K::into_usize(lasso_key);
|
||||
u32::try_from(raw_key)
|
||||
.ok()
|
||||
.and_then(TokenKey::try_from_u32)
|
||||
.ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key })
|
||||
}
|
||||
|
||||
fn get_or_intern(&mut self, text: &str) -> TokenKey {
|
||||
let lasso_key = <Self as lasso::Interner<K>>::get_or_intern(self, text);
|
||||
let raw_key = K::into_usize(lasso_key);
|
||||
u32::try_from(raw_key)
|
||||
.ok()
|
||||
.and_then(TokenKey::try_from_u32)
|
||||
.ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key })
|
||||
.unwrap_or_else(|_| panic!("invalid key: failed to convert `lasso::Key` `{raw_key}` to `InternKey` (failed to intern {text:?})"))
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
compat_resolver!(RodeoReader<K, S> if #[cfg(feature = "lasso_compat")]);
|
||||
compat_resolver!(RodeoResolver<K> if #[cfg(feature = "lasso_compat")]);
|
||||
|
||||
compat_resolver!(Rodeo<K, S> if #[cfg(feature = "lasso_compat")]);
|
||||
compat_interner!(Rodeo<K, S> if #[cfg(feature = "lasso_compat")]);
|
||||
|
||||
#[cfg(feature = "multi_threaded_interning")]
|
||||
mod multi_threaded {
|
||||
use super::*;
|
||||
|
||||
compat_resolver!(ThreadedRodeo<K, S> where K: Hash, S: BuildHasher, S: Clone if #[cfg(feature = "multi_threaded_interning")]);
|
||||
|
||||
compat_interner!(ThreadedRodeo<K, S> where K: Hash, S: Clone if #[cfg(feature = "multi_threaded_interning")]);
|
||||
|
||||
#[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))]
|
||||
impl<K, S> Resolver<TokenKey> for &lasso::ThreadedRodeo<K, S>
|
||||
where
|
||||
K: lasso::Key + Hash,
|
||||
S: BuildHasher + Clone,
|
||||
{
|
||||
#[inline]
|
||||
fn try_resolve(&self, key: TokenKey) -> Option<&str> {
|
||||
<lasso::ThreadedRodeo<K, S> as Resolver<TokenKey>>::try_resolve(self, key)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn resolve(&self, key: TokenKey) -> &str {
|
||||
<lasso::ThreadedRodeo<K, S> as Resolver<TokenKey>>::resolve(self, key)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))]
|
||||
impl<K, S> Interner<TokenKey> for &lasso::ThreadedRodeo<K, S>
|
||||
where
|
||||
K: lasso::Key + Hash,
|
||||
S: BuildHasher + Clone,
|
||||
{
|
||||
type Error = <lasso::ThreadedRodeo<K, S> as Interner<TokenKey>>::Error;
|
||||
|
||||
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
|
||||
let lasso_key = <Self as lasso::Interner<K>>::try_get_or_intern(self, text)?;
|
||||
let raw_key = K::into_usize(lasso_key);
|
||||
u32::try_from(raw_key)
|
||||
.ok()
|
||||
.and_then(TokenKey::try_from_u32)
|
||||
.ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key })
|
||||
}
|
||||
|
||||
fn get_or_intern(&mut self, text: &str) -> TokenKey {
|
||||
let lasso_key = <Self as lasso::Interner<K>>::get_or_intern(self, text);
|
||||
let raw_key = K::into_usize(lasso_key);
|
||||
u32::try_from(raw_key)
|
||||
.ok()
|
||||
.and_then(TokenKey::try_from_u32)
|
||||
.ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key })
|
||||
.unwrap_or_else(|_| panic!("invalid key: failed to convert `lasso::Key` `{raw_key}` to `InternKey` (failed to intern {text:?})"))
|
||||
}
|
||||
}
|
||||
}
|
228
src/interning/salsa_compat.rs
Normal file
228
src/interning/salsa_compat.rs
Normal file
|
@ -0,0 +1,228 @@
|
|||
//! # Using a `salsa` database as the interner for `cstree`
|
||||
//!
|
||||
//! <p
|
||||
//! style="background:rgba(255,181,77,0.16);padding:0.75em;white-space:normal;font:inherit;">
|
||||
//! <strong>Warning</strong>: Compatibility is only provided for "Salsa 2022".
|
||||
//! This version is currently under active development and <code style="background:rgba(41,24,0,0.9);">cstree</code>'s
|
||||
//! compatibility features are unstable until there is an official
|
||||
//! release.
|
||||
//! Older versions of `salsa` are not supported.
|
||||
//! </p>
|
||||
//!
|
||||
//! If you are using the `salsa` query system, you already have access to an implemenation of interning through
|
||||
//! [`#[salsa::interned]`](macro@salsa::interned). This is all that is needed to use `cstree` and this module provides
|
||||
//! the utilities needed to use `salsa`'s interners for working with syntax trees.
|
||||
//!
|
||||
//! Note that the primary benefit of this is that it avoids additional dependencies because it uses an interner that you
|
||||
//! already depend on, but it can also be beneficial to use an interner that is more specialized towards string
|
||||
//! interning. In particular, using `salsa`'s interning requires allocating all strings that are interned even if they
|
||||
//! are deduplicated because they already exist in the interner.
|
||||
//!
|
||||
//! ## How to do it
|
||||
//!
|
||||
//! ```
|
||||
//! # use cstree::testing::*;
|
||||
//! # use cstree::interning::salsa_compat::salsa;
|
||||
//! # use cstree::impl_cstree_interning_for_salsa;
|
||||
//! // Define the `salsa` jar, database and intern Id
|
||||
//! #[salsa::jar(db = Db)]
|
||||
//! pub struct Jar(SourceId);
|
||||
//!
|
||||
//! pub trait Db: salsa::DbWithJar<Jar> {}
|
||||
//! impl<DB> Db for DB where DB: ?Sized + salsa::DbWithJar<Jar> {}
|
||||
//!
|
||||
//! // If you are not a doctest and can put `Jar` at the root of your crate,
|
||||
//! // this can just be `#[salsa::interned]`.
|
||||
//! #[salsa::interned(jar = Jar)]
|
||||
//! pub struct SourceId {
|
||||
//! #[return_ref]
|
||||
//! pub text: String,
|
||||
//! }
|
||||
//!
|
||||
//! #[derive(Default)]
|
||||
//! #[salsa::db(Jar)]
|
||||
//! struct Database {
|
||||
//! storage: salsa::Storage<Self>,
|
||||
//! }
|
||||
//! impl salsa::Database for Database {}
|
||||
//!
|
||||
//! // Let `cstree` define a conversion trait and implement it for your database.
|
||||
//! // `Database` is your db type, `SourceId` is your interning id, and `text` is
|
||||
//! // its text field (all as defined above).
|
||||
//! impl_cstree_interning_for_salsa!(impl Interning for Database => text as SourceId);
|
||||
//!
|
||||
//! // Build a tree with the `salsa` interner
|
||||
//! let db = Database::default();
|
||||
//! let interner = db.as_interner(); // <-- conversion happens here
|
||||
//! let mut shared_interner = &interner;
|
||||
//! let mut builder: GreenNodeBuilder<TestLang, _> = GreenNodeBuilder::with_interner(&mut shared_interner);
|
||||
//! let (tree, _no_interner_because_it_was_borrowed) = {
|
||||
//! builder.start_node(TestSyntaxKind::Plus);
|
||||
//! builder.token(TestSyntaxKind::Float, "2.05");
|
||||
//! builder.token(TestSyntaxKind::Whitespace, " ");
|
||||
//! builder.token(TestSyntaxKind::Plus, "+");
|
||||
//! builder.token(TestSyntaxKind::Whitespace, " ");
|
||||
//! builder.token(TestSyntaxKind::Float, "7.32");
|
||||
//! builder.finish_node();
|
||||
//! builder.finish()
|
||||
//! };
|
||||
//! let tree: SyntaxNode<TestLang> = SyntaxNode::new_root(tree);
|
||||
//! assert_eq!(tree.resolve_text(shared_interner), "2.05 + 7.32");
|
||||
//! ```
|
||||
//!
|
||||
//! The full code is also available in the `salsa` example.
|
||||
//!
|
||||
//! ## Working with `InternWithDb` directly
|
||||
//! If you don't want the trait, or macros, or if you just need more control about what happens during interning and
|
||||
//! resolution, you can skip using [`impl_cstree_interning_for_salsa`](crate::impl_cstree_interning_for_salsa) and use
|
||||
//! [`InternWithDb`] directly.
|
||||
//!
|
||||
//! Because `salsa` generates inherent methods (and not, for example, a trait implementation), we need information about
|
||||
//! the used interning id either way. All that `as_interner` does is construct an instance of `InternWithDb` that uses
|
||||
//! the generated methods to invoke `salsa`s interner. The implementation expands to
|
||||
//! ```text
|
||||
//! InternWithDb::new(
|
||||
//! db,
|
||||
//! |db, text| SourceId::new(db, text),
|
||||
//! |db, id| id.text(db),
|
||||
//! )
|
||||
//! ```
|
||||
//! but you may provide any function that doesn't capture.
|
||||
|
||||
#![cfg(feature = "salsa_2022_compat")]
|
||||
|
||||
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
|
||||
pub use salsa;
|
||||
|
||||
use core::fmt;
|
||||
|
||||
use super::{InternKey, Interner, Resolver, TokenKey};
|
||||
|
||||
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
|
||||
impl salsa::AsId for TokenKey {
|
||||
fn as_id(self) -> salsa::Id {
|
||||
salsa::Id::from_u32(self.into_u32())
|
||||
}
|
||||
|
||||
/// Create an instance of the intern-key from an ID.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the given `id` from `salsa` cannot be represented by a [`TokenKey`].
|
||||
fn from_id(id: salsa::Id) -> Self {
|
||||
TokenKey::try_from_u32(id.as_u32())
|
||||
.unwrap_or_else(|| panic!("`salsa::Id` is invalid for `TokenKey`'s keyspace: {id:?}"))
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates an extension trait `SalsaAsInterner` that lets you call `db.as_interner()` on your [`salsa::Database`] to
|
||||
/// obtain a `cstree` compatible [`Interner`].
|
||||
///
|
||||
/// The `as_interner` method returns an instance of [`InternWithDb`] that uses the functions generated by `salsa` for
|
||||
/// your Id type to perform interning and resolution.
|
||||
///
|
||||
/// If you have defined your interned text as
|
||||
/// ```ignore
|
||||
/// #[salsa::interned]
|
||||
/// pub struct SourceId {
|
||||
/// #[return_ref]
|
||||
/// pub text: String,
|
||||
/// }
|
||||
/// ```
|
||||
/// the syntax is
|
||||
/// ```ignore
|
||||
/// impl_cstree_interning_for_salsa!(impl Interning for YourDatabase => text as SourceId);
|
||||
/// ```
|
||||
/// where `text` the name of the interned field.
|
||||
/// Note that the use of `#[return_ref]` is required.
|
||||
#[macro_export]
|
||||
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
|
||||
macro_rules! impl_cstree_interning_for_salsa {
|
||||
(impl Interning for $db:ty => $name:ident as $id:ty) => {
|
||||
trait SalsaAsInterner {
|
||||
fn as_interner(&self) -> ::cstree::interning::salsa_compat::InternWithDb<'_, $db, $id>;
|
||||
}
|
||||
|
||||
impl SalsaAsInterner for Database {
|
||||
fn as_interner(&self) -> ::cstree::interning::salsa_compat::InternWithDb<'_, $db, $id> {
|
||||
::cstree::interning::salsa_compat::InternWithDb::new(
|
||||
self,
|
||||
|db, text| <$id>::new(db, text),
|
||||
|db, id| id.$name(db),
|
||||
)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// This type allows you to wrap access to a [`salsa::Database`] together with an interning and a lookup function, which
|
||||
/// makes it implement [`Interner`] and [`Resolver`]. The [module documentation](self) shows how to use this with your
|
||||
/// own database, or you can use [`impl_cstree_interning_for_salsa`](crate::impl_cstree_interning_for_salsa).
|
||||
///
|
||||
/// The interning traits are also implemented by `&InternWithDb`, as the `salsa` database supports interning through
|
||||
/// shared references (see also [the `interning` module documentation](super)).
|
||||
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
|
||||
pub struct InternWithDb<'db, Db: salsa::Database, Id: salsa::interned::InternedId> {
|
||||
db: &'db Db,
|
||||
intern: fn(&Db, text: String) -> Id,
|
||||
lookup: fn(&Db, Id) -> &str,
|
||||
}
|
||||
|
||||
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> fmt::Debug for InternWithDb<'db, Db, Id> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str("InternWithDb")
|
||||
}
|
||||
}
|
||||
|
||||
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> InternWithDb<'db, Db, Id> {
|
||||
/// Create an [`Interner`] that works with `cstree` but uses the given `db` from `salsa`.
|
||||
/// To do this, you need to provide a function for interning new strings that creates the [`InternedId`] that you
|
||||
/// defined with [`#[salsa::interned]`](macro@salsa::interned), and a second one that resolves an Id using your
|
||||
/// database. See the [module documentation](self) for an example.
|
||||
///
|
||||
/// [`InternedId`]: salsa::interned::InternedId
|
||||
pub fn new(db: &'db Db, intern: fn(&Db, text: String) -> Id, lookup: fn(&Db, Id) -> &str) -> Self {
|
||||
Self { db, intern, lookup }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Resolver<TokenKey> for InternWithDb<'db, Db, Id> {
|
||||
fn try_resolve(&self, key: TokenKey) -> Option<&'db str> {
|
||||
use salsa::AsId;
|
||||
|
||||
let key = Id::from_id(key.as_id());
|
||||
let text = (self.lookup)(self.db, key);
|
||||
Some(text)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Interner<TokenKey> for InternWithDb<'db, Db, Id> {
|
||||
type Error = std::convert::Infallible;
|
||||
|
||||
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
|
||||
use salsa::AsId;
|
||||
|
||||
let id = (self.intern)(self.db, text.to_string());
|
||||
Ok(TokenKey::from_id(id.as_id()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Resolver<TokenKey> for &InternWithDb<'db, Db, Id> {
|
||||
fn try_resolve(&self, key: TokenKey) -> Option<&'db str> {
|
||||
use salsa::AsId;
|
||||
|
||||
let key = Id::from_id(key.as_id());
|
||||
let text = (self.lookup)(self.db, key);
|
||||
Some(text)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Interner<TokenKey> for &InternWithDb<'db, Db, Id> {
|
||||
type Error = std::convert::Infallible;
|
||||
|
||||
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
|
||||
use salsa::AsId;
|
||||
|
||||
let id = (self.intern)(self.db, text.to_string());
|
||||
Ok(TokenKey::from_id(id.as_id()))
|
||||
}
|
||||
}
|
67
src/interning/traits.rs
Normal file
67
src/interning/traits.rs
Normal file
|
@ -0,0 +1,67 @@
|
|||
use core::fmt;
|
||||
|
||||
use super::TokenKey;
|
||||
|
||||
/// Common interface for all intern keys via conversion to and from `u32`.
|
||||
///
|
||||
/// # Safety
|
||||
/// Implementations must guarantee that keys can round-trip in both directions: going from `Self` to `u32` to `Self` and
|
||||
/// going from `u32` to `Self` to `u32` must each yield the original value.
|
||||
pub unsafe trait InternKey: Copy + Eq + fmt::Debug {
|
||||
/// Convert `self` into its raw representation.
|
||||
fn into_u32(self) -> u32;
|
||||
|
||||
/// Try to reconstruct an intern key from its raw representation.
|
||||
/// Returns `None` if `key` is not a valid key.
|
||||
fn try_from_u32(key: u32) -> Option<Self>;
|
||||
}
|
||||
|
||||
/// The read-only part of an interner.
|
||||
/// Allows to perform lookups of intern keys to resolve them to their interned text.
|
||||
pub trait Resolver<Key: InternKey = TokenKey> {
|
||||
/// Tries to resolve the given `key` and return its interned text.
|
||||
///
|
||||
/// If `self` does not contain any text for `key`, `None` is returned.
|
||||
fn try_resolve(&self, key: Key) -> Option<&str>;
|
||||
|
||||
/// Resolves `key` to its interned text.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if there is no text for `key`.
|
||||
///
|
||||
/// Compatibility implementations for interners from other crates may also panic if `key` cannot be converted to the
|
||||
/// key type of the external interner. Please ensure you configure any external interners appropriately (for
|
||||
/// example by choosing an appropriately sized key type).
|
||||
fn resolve(&self, key: Key) -> &str {
|
||||
self.try_resolve(key)
|
||||
.unwrap_or_else(|| panic!("failed to resolve `{key:?}`"))
|
||||
}
|
||||
}
|
||||
|
||||
/// A full interner, which can intern new strings returning intern keys and also resolve intern keys to the interned
|
||||
/// value.
|
||||
///
|
||||
/// **Note:** Because single-threaded interners may require mutable access, the methods on this trait take `&mut self`.
|
||||
/// In order to use a multi- (or single)-threaded interner that allows access through a shared reference, it is
|
||||
/// implemented for `&`[`MultiThreadedTokenInterner`](crate::interning::MultiThreadedTokenInterner), allowing it to be
|
||||
/// used with a `&mut &MultiThreadTokenInterner`.
|
||||
pub trait Interner<Key: InternKey = TokenKey>: Resolver<Key> {
|
||||
/// Represents possible ways in which interning may fail.
|
||||
/// For example, this might be running out of fresh intern keys, or failure to allocate sufficient space for a new
|
||||
/// value.
|
||||
type Error;
|
||||
|
||||
/// Interns `text` and returns a new intern key for it.
|
||||
/// If `text` was already previously interned, it will not be used and the existing intern key for its value will be
|
||||
/// returned.
|
||||
fn try_get_or_intern(&mut self, text: &str) -> Result<Key, Self::Error>;
|
||||
|
||||
/// Interns `text` and returns a new intern key for it.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the internment process raises an [`Error`](Interner::Error).
|
||||
fn get_or_intern(&mut self, text: &str) -> Key {
|
||||
self.try_get_or_intern(text)
|
||||
.unwrap_or_else(|_| panic!("failed to intern `{text:?}`"))
|
||||
}
|
||||
}
|
405
src/lib.rs
405
src/lib.rs
|
@ -2,7 +2,7 @@
|
|||
//! "Traditional" abstract syntax trees (ASTs) usually contain different types of nodes which represent information
|
||||
//! about the source text of a document and reduce this information to the minimal amount necessary to correctly
|
||||
//! interpret it. In contrast, CSTs are lossless representations of the entire input where all tree nodes are
|
||||
//! represented uniformly (i.e. the nodes are _untyped_), but include a [`SyntaxKind`] field to determine the kind of
|
||||
//! represented uniformly (i.e. the nodes are _untyped_), but include a [`RawSyntaxKind`] field to determine the kind of
|
||||
//! node.
|
||||
//! One of the big advantages of this representation is not only that it can recreate the original source exactly, but
|
||||
//! also that it lends itself very well to the representation of _incomplete or erroneous_ trees and is thus very suited
|
||||
|
@ -35,41 +35,385 @@
|
|||
//! references. You can still `clone` to obtain an owned node, but you only pay that cost when you need to.
|
||||
//!
|
||||
//! ## Getting Started
|
||||
//! The main entry points for constructing syntax trees are [`GreenNodeBuilder`] and [`SyntaxNode::new_root`] for green
|
||||
//! and red trees respectively. See `examples/s_expressions.rs` for a guided tutorial to `cstree`.
|
||||
//! If you're looking at `cstree`, you're probably looking at or already writing a parser and are considering using
|
||||
//! concrete syntax trees as its output. We'll talk more about parsing below -- first, let's have a look at what needs
|
||||
//! to happen to go from input text to a `cstree` syntax tree:
|
||||
//!
|
||||
//! 1. Define an enumeration of the types of tokens (like keywords) and nodes (like "an expression") that you want to
|
||||
//! have in your syntax and implement [`Language`]
|
||||
//!
|
||||
//! 2. Create a [`GreenNodeBuilder`](build::GreenNodeBuilder) and call
|
||||
//! [`start_node`](build::GreenNodeBuilder::start_node), [`token`](build::GreenNodeBuilder::token) and
|
||||
//! [`finish_node`](build::GreenNodeBuilder::finish_node) from your parser
|
||||
//!
|
||||
//! 3. Call [`SyntaxNode::new_root`](syntax::SyntaxNode::new_root) or
|
||||
//! [`SyntaxNode::new_root_with_resolver`](syntax::SyntaxNode::new_root_with_resolver) with the resulting
|
||||
//! [`GreenNode`](green::GreenNode) to obtain a syntax tree that you can traverse
|
||||
//!
|
||||
//! Let's walk through the motions of parsing a (very) simple language into `cstree` syntax trees.
|
||||
//! We'll just support addition and subtraction on integers, from which the user is allowed to construct a single,
|
||||
//! compound expression. They will, however, be allowed to write nested expressions in parentheses, like `1 - (2 + 5)`.
|
||||
//!
|
||||
//! ### Defining the language
|
||||
//!
|
||||
//! First, we need to list the different part of our language's grammar.
|
||||
//! We can do that using an `enum` with a unit variant for any terminal and non-terminal.
|
||||
//! The `enum` needs to be convertible to a `u16`, so we use the `repr` attribute to ensure it uses the correct
|
||||
//! representation.
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
//! #[repr(u16)]
|
||||
//! enum SyntaxKind {
|
||||
//! /* Tokens */
|
||||
//! Int, // 42
|
||||
//! Plus, // +
|
||||
//! Minus, // -
|
||||
//! LParen, // (
|
||||
//! RParen, // )
|
||||
//! /* Nodes */
|
||||
//! Expr,
|
||||
//! Root,
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! Most of these are tokens to lex the input string into, like numbers (`Int`) and operators (`Plus`, `Minus`).
|
||||
//! We only really need one type of node; expressions.
|
||||
//! Our syntax tree's root node will have the special kind `Root`, all other nodes will be
|
||||
//! expressions containing a sequence of arithmetic operations potentially involving further, nested
|
||||
//! expression nodes.
|
||||
//!
|
||||
//! To use our `SyntaxKind`s with `cstree`, we need to tell it how to convert it back to just a number (the
|
||||
//! `#[repr(u16)]` that we added) by implementing the [`Language`] trait. We can also tell `cstree` about tokens that
|
||||
//! always have the same text through the `static_text` method on the trait. This is useful for the operators and
|
||||
//! parentheses, but not possible for numbers, since an integer token may be produced from the input `3`, but also from
|
||||
//! other numbers like `7` or `12`. We implement `Language` on an empty type, just so we can give it a name.
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
//! pub struct Calculator;
|
||||
//! impl Language for Calculator {
|
||||
//! // The tokens and nodes we just defined
|
||||
//! type Kind = SyntaxKind;
|
||||
//!
|
||||
//! fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||
//! // This just needs to be the inverse of `kind_to_raw`, but could also
|
||||
//! // be an `impl TryFrom<u16> for SyntaxKind` or any other conversion.
|
||||
//! match raw.0 {
|
||||
//! 0 => SyntaxKind::Int,
|
||||
//! 1 => SyntaxKind::Plus,
|
||||
//! 2 => SyntaxKind::Minus,
|
||||
//! 3 => SyntaxKind::LParen,
|
||||
//! 4 => SyntaxKind::RParen,
|
||||
//! 5 => SyntaxKind::Expr,
|
||||
//! 6 => SyntaxKind::Root,
|
||||
//! n => panic!("Unknown raw syntax kind: {n}"),
|
||||
//! }
|
||||
//! }
|
||||
//!
|
||||
//! fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||
//! RawSyntaxKind(kind as u16)
|
||||
//! }
|
||||
//!
|
||||
//! fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||
//! match kind {
|
||||
//! SyntaxKind::Plus => Some("+"),
|
||||
//! SyntaxKind::Minus => Some("-"),
|
||||
//! SyntaxKind::LParen => Some("("),
|
||||
//! SyntaxKind::RParen => Some(")"),
|
||||
//! _ => None,
|
||||
//! }
|
||||
//! }
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Parsing into a green tree
|
||||
//! With that out of the way, we can start writing the parser for our expressions.
|
||||
//! For the purposes of this introduction to `cstree`, I'll assume that there is a lexer that yields the following
|
||||
//! tokens:
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! #[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
//! pub enum Token<'input> {
|
||||
//! // Note that number strings are not yet parsed into actual numbers,
|
||||
//! // we just remember the slice of the input that contains their digits
|
||||
//! Int(&'input str),
|
||||
//! Plus,
|
||||
//! Minus,
|
||||
//! LParen,
|
||||
//! RParen,
|
||||
//! // A special token that indicates that we have reached the end of the file
|
||||
//! EoF,
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! A simple lexer that yields such tokens is part of the full `readme` example, but we'll be busy enough with the
|
||||
//! combination of `cstree` and the actual parser, which we define like this:
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! pub struct Parser<'input> {
|
||||
//! // `Peekable` is a standard library iterator adapter that allows
|
||||
//! // looking ahead at the next item without removing it from the iterator yet
|
||||
//! lexer: Peekable<Lexer<'input>>,
|
||||
//! builder: GreenNodeBuilder<'static, 'static, Calculator>,
|
||||
//! }
|
||||
//!
|
||||
//! impl<'input> Parser<'input> {
|
||||
//! pub fn new(input: &'input str) -> Self {
|
||||
//! Self {
|
||||
//! // we get `peekable` from implementing `Iterator` on `Lexer`
|
||||
//! lexer: Lexer::new(input).peekable(),
|
||||
//! builder: GreenNodeBuilder::new(),
|
||||
//! }
|
||||
//! }
|
||||
//!
|
||||
//! pub fn bump(&mut self) -> Option<Token<'input>> {
|
||||
//! self.lexer.next()
|
||||
//! }
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! In contrast to parsers that return abstract syntax trees, with `cstree` the syntax tree nodes
|
||||
//! for all element in the language grammar will have the same type: [`GreenNode`](green::GreenNode)
|
||||
//! for the inner ("green") tree and [`SyntaxNode`](syntax::SyntaxNode) for the outer ("red") tree.
|
||||
//! Different kinds of nodes (and tokens) are differentiated by their `SyntaxKind` tag, which we defined above.
|
||||
//!
|
||||
//! You can implement many types of parsers with `cstree`. To get a feel for how it works, consider
|
||||
//! a typical recursive descent parser. With a more traditional AST, one would define different AST
|
||||
//! structs for struct or function definitions, statements, expressions and so on. Inside the
|
||||
//! parser, the components of any element, such as all fields of a struct or all statements inside a
|
||||
//! function, are parsed first and then the parser wraps them in the matching AST type, which is
|
||||
//! returned from the corresponding parser function.
|
||||
//!
|
||||
//! Because `cstree`'s syntax trees are untyped, there is no explicit AST representation that the
|
||||
//! parser would build. Instead, parsing into a CST using the
|
||||
//! [`GreenNodeBuilder`](build::GreenNodeBuilder) follows the source code more closely in that you
|
||||
//! tell `cstree` about each new element you enter and all tokens that the parser consumes. So, for
|
||||
//! example, to parse a struct definition the parser first "enters" the struct definition node, then
|
||||
//! parses the `struct` keyword and type name, then parses each field, and finally "finishes"
|
||||
//! parsing the struct node.
|
||||
//!
|
||||
//! The most trivial example is the root node for our parser, which just creates a root node
|
||||
//! containing the whole expression (we could do without a specific root node if any expression was
|
||||
//! a node, in particular if we wrapped integer literal tokens inside `Expr` nodes).
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! pub fn parse(&mut self) -> Result<(), String> {
|
||||
//! self.builder.start_node(SyntaxKind::Root);
|
||||
//! self.parse_expr()?;
|
||||
//! self.builder.finish_node();
|
||||
//! Ok(())
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! As there isn't a static AST type to return, the parser is very flexible as to what is part of a
|
||||
//! node. In the previous example, if the user is adding a new field to the struct and has not yet
|
||||
//! typed the field's type, the CST node for the struct doesn't care if there is no child node for
|
||||
//! it. Similarly, if the user is deleting fields and the source code currently contains a leftover
|
||||
//! field name, this additional identifier can be a part of the struct node without any
|
||||
//! modifications to the syntax tree definition. This property is the key to why CSTs are such a
|
||||
//! good fit as a lossless input representation, which necessitates the syntax tree to mirror the
|
||||
//! user-specific layout of whitespace and comments around the AST items.
|
||||
//!
|
||||
//! In the parser for our simple expression language, we'll also have to deal with the fact that,
|
||||
//! when we see a number the parser doesn't yet know whether there will be additional operations
|
||||
//! following that number. That is, in the expression `1 + 2`, it can only know that it is parsing
|
||||
//! a binary operation once it sees the `+`. The event-like model of building trees in `cstree`,
|
||||
//! however, implies that when reaching the `+`, the parser would have to have already entered an
|
||||
//! expression node in order for the whole input to be part of the expression.
|
||||
//!
|
||||
//! To get around this, `GreenNodeBuilder` provides the
|
||||
//! [`checkpoint`](build::GreenNodeBuilder::checkpoint) method, which we can call to "remember" the
|
||||
//! current position in the input. For example, we can create a checkpoint before the parser parses
|
||||
//! the first `1`. Later, when it sees the following `+`, it can create an `Expr` node for the
|
||||
//! whole expression using [`start_node_at`](build::GreenNodeBuilder::start_node_at):
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! fn parse_lhs(&mut self) -> Result<(), String> {
|
||||
//! // An expression may start either with a number, or with an opening parenthesis that is
|
||||
//! // the start of a parenthesized expression
|
||||
//! let next_token = *self.lexer.peek().unwrap();
|
||||
//! match next_token {
|
||||
//! Token::Int(n) => {
|
||||
//! self.bump();
|
||||
//! self.builder.token(SyntaxKind::Int, n);
|
||||
//! }
|
||||
//! Token::LParen => {
|
||||
//! // Wrap the grouped expression inside a node containing it and its parentheses
|
||||
//! self.builder.start_node(SyntaxKind::Expr);
|
||||
//! self.bump();
|
||||
//! self.builder.static_token(SyntaxKind::LParen);
|
||||
//! self.parse_expr()?; // Inner expression
|
||||
//! if self.bump() != Some(Token::RParen) {
|
||||
//! return Err("Missing ')'".to_string());
|
||||
//! }
|
||||
//! self.builder.static_token(SyntaxKind::RParen);
|
||||
//! self.builder.finish_node();
|
||||
//! }
|
||||
//! Token::EoF => return Err("Unexpected end of file: expected expression".to_string()),
|
||||
//! t => return Err(format!("Unexpected start of expression: '{t:?}'")),
|
||||
//! }
|
||||
//! Ok(())
|
||||
//! }
|
||||
//!
|
||||
//! fn parse_expr(&mut self) -> Result<(), String> {
|
||||
//! // Remember our current position
|
||||
//! let before_expr = self.builder.checkpoint();
|
||||
//!
|
||||
//! // Parse the start of the expression
|
||||
//! self.parse_lhs()?;
|
||||
//!
|
||||
//! // Check if the expression continues with `+ <more>` or `- <more>`
|
||||
//! let Some(next_token) = self.lexer.peek() else {
|
||||
//! return Ok(());
|
||||
//! };
|
||||
//! let op = match *next_token {
|
||||
//! Token::Plus => SyntaxKind::Plus,
|
||||
//! Token::Minus => SyntaxKind::Minus,
|
||||
//! Token::RParen | Token::EoF => return Ok(()),
|
||||
//! t => return Err(format!("Expected operator, found '{t:?}'")),
|
||||
//! };
|
||||
//!
|
||||
//! // If so, retroactively wrap the (already parsed) LHS and the following RHS
|
||||
//! // inside an `Expr` node
|
||||
//! self.builder.start_node_at(before_expr, SyntaxKind::Expr);
|
||||
//! self.bump();
|
||||
//! self.builder.static_token(op);
|
||||
//! self.parse_expr()?; // RHS
|
||||
//! self.builder.finish_node();
|
||||
//! Ok(())
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ### Obtaining the parser result
|
||||
//!
|
||||
//! Our parser is now capable of parsing our little arithmetic language, but it's methods don't
|
||||
//! return anything. So how do we get our syntax tree out? The answer lies in
|
||||
//! [`GreenNodeBuilder::finish`](build::GreenNodeBuilder::finish), which finally returns the tree
|
||||
//! that we have painstakingly constructed.
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! impl Parser<'_> {
|
||||
//! pub fn finish(mut self) -> (GreenNode, impl Interner) {
|
||||
//! assert!(self.lexer.next().map(|t| t == Token::EoF).unwrap_or(true));
|
||||
//! let (tree, cache) = self.builder.finish();
|
||||
//! (tree, cache.unwrap().into_interner().unwrap())
|
||||
//! }
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! `finish` also returns the cache it used to deduplicate tree nodes and tokens, so you can re-use
|
||||
//! it for parsing related inputs (e.g., different source files from the same crate may share a lot
|
||||
//! of common function and type names that can be deduplicated). See `GreenNodeBuilder`'s
|
||||
//! documentation for more information on this, in particular the `with_cache` and `from_cache`
|
||||
//! methods. Most importantly for us, we can extract the [`Interner`](interning::Interner) that
|
||||
//! contains the source text of the tree's tokens from the cache, which we need if we want to look
|
||||
//! up things like variable names or the value of numbers for our calculator.
|
||||
//!
|
||||
//! To work with the syntax tree, you'll want to upgrade it to a [`SyntaxNode`](syntax::SyntaxNode)
|
||||
//! using [`SyntaxNode::new_root`](syntax::SyntaxNode::new_root). You can also use
|
||||
//! [`SyntaxNode::new_root_with_resolver`](syntax::SyntaxNode::new_root_with_resolver) to combine
|
||||
//! tree and interner, which lets you directly retrieve source text and makes the nodes implement
|
||||
//! `Display` and `Debug`. The same output can be produced from `SyntaxNode`s by calling the
|
||||
//! `debug` or `display` method with a [`Resolver`](interning::Resolver). To visualize the whole
|
||||
//! syntax tree, pass `true` for the `recursive` parameter on `debug`, or simply debug-print a
|
||||
//! [`ResolvedNode`](syntax::ResolvedNode):
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! let input = "11 + 2-(5 + 4)";
|
||||
//! let mut parser = Parser::new(input);
|
||||
//! parser.parse().unwrap();
|
||||
//! let (tree, interner) = parser.finish();
|
||||
//! let root = SyntaxNode::<Calculator>::new_root_with_resolver(tree, interner);
|
||||
//! dbg!(root);
|
||||
//! ```
|
||||
//!
|
||||
//! ### Further examples
|
||||
//! The parser we just built is available in full in the runnable `readme` example, which includes some additional code
|
||||
//! to read expressions from the terminal and evaluate the parsed expressions - have it do a few calculations if you
|
||||
//! like.
|
||||
//! There are several more examples in the `examples/` folder in the repository.
|
||||
//! A good starting point is the `s_expressions` example, which implements a parser for a small S-Expression language
|
||||
//! with guiding comments.
|
||||
//!
|
||||
//! ## AST Layer
|
||||
//! While `cstree` is built for concrete syntax trees, applications are quite easily able to work with either a CST or
|
||||
//! an AST representation, or freely switch between them. To do so, use `cstree` to build syntax and underlying green
|
||||
//! tree and provide AST wrappers for your different kinds of nodes. An example of how this is done can be seen [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/crates/syntax/src/ast/generated.rs) and [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/crates/syntax/src/ast/generated/nodes.rs) (note that the latter file is automatically generated by a task).
|
||||
//! tree and provide AST wrappers for your different kinds of nodes. An example of how this is done can be seen [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/crates/syntax/src/ast/generated.rs)
|
||||
//! and [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/crates/syntax/src/ast/generated/nodes.rs)
|
||||
//! (note that the latter file is automatically generated by a task using [`ungrammar`](https://crates.io/crates/ungrammar)).
|
||||
|
||||
#![forbid(missing_debug_implementations, unconditional_recursion)]
|
||||
#![deny(unsafe_code, missing_docs, future_incompatible)]
|
||||
#![deny(unsafe_code, future_incompatible)]
|
||||
#![allow(unstable_name_collisions)] // strict provenance - must come after `future_incompatible` to take precedence
|
||||
#![warn(missing_docs)]
|
||||
// Docs.rs
|
||||
#![doc(html_root_url = "https://docs.rs/cstree/0.12.0-rc.0")]
|
||||
#![cfg_attr(doc_cfg, feature(doc_cfg))]
|
||||
|
||||
#[allow(unsafe_code)]
|
||||
mod green;
|
||||
pub mod green;
|
||||
#[allow(unsafe_code)]
|
||||
mod syntax;
|
||||
pub mod syntax;
|
||||
|
||||
#[allow(unsafe_code)]
|
||||
pub mod interning;
|
||||
|
||||
#[cfg(feature = "serialize")]
|
||||
mod serde_impls;
|
||||
#[allow(missing_docs)]
|
||||
mod utility_types;
|
||||
|
||||
pub mod interning;
|
||||
use std::fmt;
|
||||
|
||||
// Reexport types for working with strings.
|
||||
pub use text_size::{TextLen, TextRange, TextSize};
|
||||
/// `RawSyntaxKind` is a type tag for each token or node.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct RawSyntaxKind(pub u16);
|
||||
|
||||
#[doc(inline)]
|
||||
pub use crate::syntax::*;
|
||||
pub use crate::{
|
||||
green::{Checkpoint, GreenNode, GreenNodeBuilder, GreenNodeChildren, GreenToken, NodeCache, SyntaxKind},
|
||||
utility_types::{Direction, NodeOrToken, TokenAtOffset, WalkEvent},
|
||||
};
|
||||
pub use triomphe::Arc;
|
||||
/// Typesafe representations of text ranges and sizes.
|
||||
pub mod text {
|
||||
pub use crate::syntax::SyntaxText;
|
||||
pub use text_size::{TextLen, TextRange, TextSize};
|
||||
}
|
||||
|
||||
/// A tree builder for the construction of syntax trees.
|
||||
///
|
||||
/// Please refer to the documentation on [`GreenNodeBuilder`](build::GreenNodeBuilder) itself and the ["getting started"
|
||||
/// section](../index.html#getting-started) from the top-level documentation for an introduction to how to build a
|
||||
/// syntax tree.
|
||||
pub mod build {
|
||||
pub use crate::green::builder::{Checkpoint, GreenNodeBuilder, NodeCache};
|
||||
}
|
||||
|
||||
/// A convenient collection of the most used parts of `cstree`.
|
||||
pub mod prelude {
|
||||
pub use crate::{
|
||||
build::GreenNodeBuilder,
|
||||
green::{GreenNode, GreenToken},
|
||||
syntax::{SyntaxElement, SyntaxNode, SyntaxToken},
|
||||
Language, RawSyntaxKind,
|
||||
};
|
||||
}
|
||||
|
||||
/// Types for syntax tree traversal / moving through trees.
|
||||
pub mod traversal {
|
||||
pub use crate::utility_types::{Direction, WalkEvent};
|
||||
}
|
||||
|
||||
/// Utility types. It shouldn't be needed to reference these directly, but they are returned in several places in
|
||||
/// `cstree` and may come in handy.
|
||||
pub mod util {
|
||||
pub use crate::utility_types::{NodeOrToken, TokenAtOffset};
|
||||
}
|
||||
|
||||
/// Synchronization primitives.
|
||||
pub mod sync {
|
||||
/// An atomically reference counted shared pointer.
|
||||
///
|
||||
/// This is like [`Arc`](std::sync::Arc) in the standard library, but more efficient for how `cstree` stores
|
||||
/// syntax trees internally. This Arc does not support weak reference counting.
|
||||
pub use triomphe::Arc;
|
||||
}
|
||||
|
||||
/// The `Language` trait is the bridge between the internal `cstree` representation and your
|
||||
/// language's types.
|
||||
|
@ -97,13 +441,13 @@ pub use triomphe::Arc;
|
|||
/// impl cstree::Language for Lang {
|
||||
/// type Kind = SyntaxKind;
|
||||
///
|
||||
/// fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
|
||||
/// fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||
/// assert!(raw.0 <= __LAST as u16);
|
||||
/// unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||
/// }
|
||||
///
|
||||
/// fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
|
||||
/// cstree::SyntaxKind(kind as u16)
|
||||
/// fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||
/// cstree::RawSyntaxKind(kind as u16)
|
||||
/// }
|
||||
///
|
||||
/// fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||
|
@ -115,29 +459,34 @@ pub use triomphe::Arc;
|
|||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// [`SyntaxNode`]: crate::syntax::SyntaxNode
|
||||
pub trait Language: Sized + Clone + Copy + fmt::Debug + Eq + Ord + std::hash::Hash {
|
||||
/// A type that represents what items in your Language can be.
|
||||
/// Typically, this is an `enum` with variants such as `Identifier`, `Literal`, ...
|
||||
type Kind: Sized + Clone + Copy + fmt::Debug;
|
||||
|
||||
/// Construct a semantic item kind from the compact representation.
|
||||
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind;
|
||||
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind;
|
||||
|
||||
/// Convert a semantic item kind into a more compact representation.
|
||||
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind;
|
||||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind;
|
||||
|
||||
/// Fixed text for a particular syntax kind.
|
||||
///
|
||||
/// Implement for kinds that will only ever represent the same text, such as punctuation (like a
|
||||
/// semicolon), keywords (like `fn`), or operators (like `<=`).
|
||||
///
|
||||
/// Indicating tokens that have a `static_text` this way allows `cstree` to store them more efficiently, which makes
|
||||
/// it faster to add them to a syntax tree and to look up their text. Since there can often be many occurrences
|
||||
/// of these tokens inside a file, doing so will improve the performance of using `cstree`.
|
||||
fn static_text(kind: Self::Kind) -> Option<&'static str>;
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[allow(unsafe_code, unused)]
|
||||
pub mod testing {
|
||||
pub use crate::*;
|
||||
pub fn parse<L: Language, I>(_b: &mut super::GreenNodeBuilder<L, I>, _s: &str) {}
|
||||
pub use crate::prelude::*;
|
||||
pub fn parse<L: Language, I>(_b: &mut GreenNodeBuilder<L, I>, _s: &str) {}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[repr(u16)]
|
||||
|
@ -160,13 +509,13 @@ pub mod testing {
|
|||
impl Language for TestLang {
|
||||
type Kind = TestSyntaxKind;
|
||||
|
||||
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
|
||||
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||
assert!(raw.0 <= TestSyntaxKind::__LAST as u16);
|
||||
unsafe { std::mem::transmute::<u16, TestSyntaxKind>(raw.0) }
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
|
||||
SyntaxKind(kind as u16)
|
||||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||
RawSyntaxKind(kind as u16)
|
||||
}
|
||||
|
||||
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||
|
|
|
@ -1,8 +1,12 @@
|
|||
//! Serialization and Deserialization for syntax trees.
|
||||
|
||||
use crate::{
|
||||
interning::{IntoResolver, Resolver},
|
||||
GreenNodeBuilder, Language, NodeOrToken, ResolvedNode, SyntaxKind, SyntaxNode, WalkEvent,
|
||||
build::GreenNodeBuilder,
|
||||
interning::{Resolver, TokenKey},
|
||||
syntax::{ResolvedNode, SyntaxNode},
|
||||
traversal::WalkEvent,
|
||||
util::NodeOrToken,
|
||||
Language, RawSyntaxKind,
|
||||
};
|
||||
use serde::{
|
||||
de::{Error, SeqAccess, Visitor},
|
||||
|
@ -77,8 +81,8 @@ enum Event<'text> {
|
|||
/// The second parameter indicates if this node needs data.
|
||||
/// If the boolean is true, the next element inside the data list
|
||||
/// must be attached to this node.
|
||||
EnterNode(SyntaxKind, bool),
|
||||
Token(SyntaxKind, &'text str),
|
||||
EnterNode(RawSyntaxKind, bool),
|
||||
Token(RawSyntaxKind, &'text str),
|
||||
LeaveNode,
|
||||
}
|
||||
|
||||
|
@ -97,7 +101,7 @@ pub(crate) struct SerializeWithData<'node, 'resolver, L: Language, D: 'static, R
|
|||
impl<L, D, R> Serialize for SerializeWithData<'_, '_, L, D, R>
|
||||
where
|
||||
L: Language,
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
D: Serialize,
|
||||
{
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
|
@ -112,7 +116,7 @@ where
|
|||
impl<L, D, R> Serialize for SerializeWithResolver<'_, '_, L, D, R>
|
||||
where
|
||||
L: Language,
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
|
@ -192,8 +196,7 @@ where
|
|||
}
|
||||
|
||||
let (tree, cache) = builder.finish();
|
||||
let tree =
|
||||
ResolvedNode::new_root_with_resolver(tree, cache.unwrap().into_interner().unwrap().into_resolver());
|
||||
let tree = ResolvedNode::new_root_with_resolver(tree, cache.unwrap().into_interner().unwrap());
|
||||
Ok((tree, data_indices))
|
||||
}
|
||||
}
|
||||
|
@ -236,7 +239,7 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
impl Serialize for SyntaxKind {
|
||||
impl Serialize for RawSyntaxKind {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
|
@ -245,7 +248,7 @@ impl Serialize for SyntaxKind {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for SyntaxKind {
|
||||
impl<'de> Deserialize<'de> for RawSyntaxKind {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
use std::{fmt, sync::atomic::AtomicU32};
|
||||
|
||||
use lasso::Resolver;
|
||||
use text_size::{TextRange, TextSize};
|
||||
|
||||
use super::*;
|
||||
use crate::{green::GreenElementRef, Language, NodeOrToken, SyntaxKind, TokenAtOffset};
|
||||
use crate::{
|
||||
green::GreenElementRef,
|
||||
interning::{Resolver, TokenKey},
|
||||
util::{NodeOrToken, TokenAtOffset},
|
||||
Language, RawSyntaxKind,
|
||||
};
|
||||
|
||||
/// An element of the tree, can be either a node or a token.
|
||||
pub type SyntaxElement<L, D = ()> = NodeOrToken<SyntaxNode<L, D>, SyntaxToken<L, D>>;
|
||||
|
@ -27,7 +31,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
|
|||
/// To avoid allocating for every element, see [`write_display`](type.SyntaxElement.html#method.write_display).
|
||||
pub fn display<R>(&self, resolver: &R) -> String
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.display(resolver),
|
||||
|
@ -38,7 +42,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
|
|||
/// Writes this element's [`Display`](fmt::Display) representation into the given `target`.
|
||||
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.write_display(resolver, target),
|
||||
|
@ -53,7 +57,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
|
|||
/// To avoid allocating for every element, see [`write_debug`](type.SyntaxElement.html#method.write_debug).
|
||||
pub fn debug<R>(&self, resolver: &R, recursive: bool) -> String
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.debug(resolver, recursive),
|
||||
|
@ -66,7 +70,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
|
|||
/// Otherwise, only this element's kind and range are written.
|
||||
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.write_debug(resolver, target, recursive),
|
||||
|
@ -105,7 +109,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
|
|||
/// To avoid allocating for every element, see [`write_display`](type.SyntaxElementRef.html#method.write_display).
|
||||
pub fn display<R>(&self, resolver: &R) -> String
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.display(resolver),
|
||||
|
@ -116,7 +120,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
|
|||
/// Writes this element's [`Display`](fmt::Display) representation into the given `target`.
|
||||
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.write_display(resolver, target),
|
||||
|
@ -131,7 +135,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
|
|||
/// To avoid allocating for every element, see [`write_debug`](type.SyntaxElementRef.html#method.write_debug).
|
||||
pub fn debug<R>(&self, resolver: &R, recursive: bool) -> String
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.debug(resolver, recursive),
|
||||
|
@ -144,7 +148,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
|
|||
/// Otherwise, only this element's kind and range are written.
|
||||
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.write_debug(resolver, target, recursive),
|
||||
|
@ -162,8 +166,8 @@ impl<L: Language, D> SyntaxElement<L, D> {
|
|||
ref_count: *mut AtomicU32,
|
||||
) -> SyntaxElement<L, D> {
|
||||
match element {
|
||||
NodeOrToken::Node(node) => SyntaxNode::new_child(node, parent, index as u32, offset, ref_count).into(),
|
||||
NodeOrToken::Token(_) => SyntaxToken::new(parent, index as u32, offset).into(),
|
||||
NodeOrToken::Node(node) => SyntaxNode::new_child(node, parent, index, offset, ref_count).into(),
|
||||
NodeOrToken::Token(_) => SyntaxToken::new(parent, index, offset).into(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -178,7 +182,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
|
|||
|
||||
/// The internal representation of the kind of this element.
|
||||
#[inline]
|
||||
pub fn syntax_kind(&self) -> SyntaxKind {
|
||||
pub fn syntax_kind(&self) -> RawSyntaxKind {
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.syntax_kind(),
|
||||
NodeOrToken::Token(it) => it.syntax_kind(),
|
||||
|
@ -261,7 +265,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
|
|||
|
||||
/// The internal representation of the kind of this element.
|
||||
#[inline]
|
||||
pub fn syntax_kind(&self) -> SyntaxKind {
|
||||
pub fn syntax_kind(&self) -> RawSyntaxKind {
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.syntax_kind(),
|
||||
NodeOrToken::Token(it) => it.syntax_kind(),
|
||||
|
|
|
@ -4,7 +4,11 @@ use std::iter::FusedIterator;
|
|||
|
||||
use text_size::TextSize;
|
||||
|
||||
use crate::{green::GreenElementRef, GreenNodeChildren, Language, SyntaxElementRef, SyntaxNode};
|
||||
use crate::{
|
||||
green::{GreenElementRef, GreenNodeChildren},
|
||||
syntax::{SyntaxElementRef, SyntaxNode},
|
||||
Language,
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct Iter<'n> {
|
||||
|
|
|
@ -36,6 +36,7 @@ pub use text::SyntaxText;
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::testing::*;
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -2,9 +2,12 @@ use super::*;
|
|||
#[cfg(feature = "serialize")]
|
||||
use crate::serde_impls::{SerializeWithData, SerializeWithResolver};
|
||||
use crate::{
|
||||
green::{GreenElementRef, SyntaxKind},
|
||||
interning::Resolver,
|
||||
*,
|
||||
green::{GreenElementRef, GreenNode},
|
||||
interning::{Resolver, TokenKey},
|
||||
text::*,
|
||||
traversal::*,
|
||||
util::*,
|
||||
Language, RawSyntaxKind,
|
||||
};
|
||||
use parking_lot::RwLock;
|
||||
use std::{
|
||||
|
@ -39,7 +42,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
|||
/// Otherwise, only this node's kind and range are written.
|
||||
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
if recursive {
|
||||
let mut level = 0;
|
||||
|
@ -71,7 +74,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
|||
#[inline]
|
||||
pub fn debug<R>(&self, resolver: &R, recursive: bool) -> String
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
// NOTE: `fmt::Write` methods on `String` never fail
|
||||
let mut res = String::new();
|
||||
|
@ -82,7 +85,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
|||
/// Writes this node's [`Display`](fmt::Display) representation into the given `target`.
|
||||
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
self.preorder_with_tokens()
|
||||
.filter_map(|event| match event {
|
||||
|
@ -98,7 +101,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
|||
#[inline]
|
||||
pub fn display<R>(&self, resolver: &R) -> String
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
// NOTE: `fmt::Write` methods on `String` never fail
|
||||
let mut res = String::new();
|
||||
|
@ -107,21 +110,22 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
|||
}
|
||||
|
||||
/// If there is a resolver associated with this tree, returns it.
|
||||
pub fn resolver(&self) -> Option<&StdArc<dyn Resolver>> {
|
||||
pub fn resolver(&self) -> Option<&StdArc<dyn Resolver<TokenKey>>> {
|
||||
match &self.root().data().kind {
|
||||
Kind::Root(_, resolver) => resolver.as_ref(),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Turns this node into a [`ResolvedNode`], but only if there is a resolver associated with this tree.
|
||||
/// Turns this node into a [`ResolvedNode`](crate::syntax::ResolvedNode), but only if there is a resolver associated
|
||||
/// with this tree.
|
||||
#[inline]
|
||||
pub fn try_resolved(&self) -> Option<&ResolvedNode<L, D>> {
|
||||
// safety: we only coerce if `resolver` exists
|
||||
self.resolver().map(|_| unsafe { ResolvedNode::coerce_ref(self) })
|
||||
}
|
||||
|
||||
/// Turns this node into a [`ResolvedNode`].
|
||||
/// Turns this node into a [`ResolvedNode`](crate::syntax::ResolvedNode).
|
||||
/// # Panics
|
||||
/// If there is no resolver associated with this tree.
|
||||
#[inline]
|
||||
|
@ -233,7 +237,7 @@ impl<L: Language, D> Hash for SyntaxNode<L, D> {
|
|||
}
|
||||
|
||||
enum Kind<L: Language, D: 'static> {
|
||||
Root(GreenNode, Option<StdArc<dyn Resolver>>),
|
||||
Root(GreenNode, Option<StdArc<dyn Resolver<TokenKey>>>),
|
||||
Child {
|
||||
parent: SyntaxNode<L, D>,
|
||||
index: u32,
|
||||
|
@ -300,7 +304,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
|||
Self { data }
|
||||
}
|
||||
|
||||
fn make_new_root(green: GreenNode, resolver: Option<StdArc<dyn Resolver>>) -> Self {
|
||||
fn make_new_root(green: GreenNode, resolver: Option<StdArc<dyn Resolver<TokenKey>>>) -> Self {
|
||||
let ref_count = Box::new(AtomicU32::new(1));
|
||||
let n_children = green.children().count();
|
||||
let data = NodeData::new(
|
||||
|
@ -328,6 +332,8 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
|||
/// # Example
|
||||
/// ```
|
||||
/// # use cstree::testing::*;
|
||||
/// use cstree::syntax::ResolvedNode;
|
||||
///
|
||||
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
|
||||
/// builder.start_node(Root);
|
||||
/// builder.token(Identifier, "content");
|
||||
|
@ -342,8 +348,8 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
|||
/// assert_eq!(root.text(), "content");
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn new_root_with_resolver(green: GreenNode, resolver: impl Resolver + 'static) -> ResolvedNode<L, D> {
|
||||
let ptr: StdArc<dyn Resolver> = StdArc::new(resolver);
|
||||
pub fn new_root_with_resolver(green: GreenNode, resolver: impl Resolver<TokenKey> + 'static) -> ResolvedNode<L, D> {
|
||||
let ptr: StdArc<dyn Resolver<TokenKey>> = StdArc::new(resolver);
|
||||
ResolvedNode {
|
||||
syntax: SyntaxNode::make_new_root(green, Some(ptr)),
|
||||
}
|
||||
|
@ -517,7 +523,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
|||
|
||||
/// The internal representation of the kind of this node.
|
||||
#[inline]
|
||||
pub fn syntax_kind(&self) -> SyntaxKind {
|
||||
pub fn syntax_kind(&self) -> RawSyntaxKind {
|
||||
self.green().kind()
|
||||
}
|
||||
|
||||
|
@ -543,7 +549,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
|
|||
#[inline]
|
||||
pub fn resolve_text<'n, 'i, I>(&'n self, resolver: &'i I) -> SyntaxText<'n, 'i, I, L, D>
|
||||
where
|
||||
I: Resolver + ?Sized,
|
||||
I: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
SyntaxText::new(self, resolver)
|
||||
}
|
||||
|
@ -911,7 +917,7 @@ where
|
|||
/// including the data and by using an external resolver.
|
||||
pub fn as_serialize_with_data_with_resolver<'node>(
|
||||
&'node self,
|
||||
resolver: &'node impl Resolver,
|
||||
resolver: &'node impl Resolver<TokenKey>,
|
||||
) -> impl serde::Serialize + 'node
|
||||
where
|
||||
D: serde::Serialize,
|
||||
|
@ -923,7 +929,7 @@ where
|
|||
/// which uses the given resolver instead of the resolver inside the tree.
|
||||
pub fn as_serialize_with_resolver<'node>(
|
||||
&'node self,
|
||||
resolver: &'node impl Resolver,
|
||||
resolver: &'node impl Resolver<TokenKey>,
|
||||
) -> impl serde::Serialize + 'node {
|
||||
SerializeWithResolver { node: self, resolver }
|
||||
}
|
||||
|
|
|
@ -9,12 +9,15 @@ use std::{
|
|||
sync::Arc as StdArc,
|
||||
};
|
||||
|
||||
use lasso::Resolver;
|
||||
use text_size::{TextRange, TextSize};
|
||||
|
||||
use crate::{
|
||||
Direction, GreenNode, Language, NodeOrToken, SyntaxElementRef, SyntaxKind, SyntaxNode, SyntaxText, SyntaxToken,
|
||||
TokenAtOffset, WalkEvent,
|
||||
green::GreenNode,
|
||||
interning::{Resolver, TokenKey},
|
||||
syntax::*,
|
||||
traversal::*,
|
||||
util::*,
|
||||
Language, RawSyntaxKind,
|
||||
};
|
||||
|
||||
/// Syntax tree node that is guaranteed to belong to a tree that contains an associated
|
||||
|
@ -109,7 +112,7 @@ impl<L: Language, D> DerefMut for ResolvedToken<L, D> {
|
|||
/// An element of the tree that is guaranteed to belong to a tree that contains an associated
|
||||
/// [`Resolver`](lasso::Resolver), can be either a node or a token.
|
||||
/// # See also
|
||||
/// [`SyntaxElement`](crate::SyntaxElement)
|
||||
/// [`SyntaxElement`](crate::syntax::SyntaxElement)
|
||||
pub type ResolvedElement<L, D = ()> = NodeOrToken<ResolvedNode<L, D>, ResolvedToken<L, D>>;
|
||||
|
||||
impl<L: Language, D> From<ResolvedNode<L, D>> for ResolvedElement<L, D> {
|
||||
|
@ -126,7 +129,7 @@ impl<L: Language, D> From<ResolvedToken<L, D>> for ResolvedElement<L, D> {
|
|||
|
||||
impl<L: Language, D> ResolvedElement<L, D> {
|
||||
#[allow(missing_docs)]
|
||||
pub fn display(&self, resolver: &impl Resolver) -> String {
|
||||
pub fn display(&self, resolver: &impl Resolver<TokenKey>) -> String {
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.display(resolver),
|
||||
NodeOrToken::Token(it) => it.display(resolver),
|
||||
|
@ -177,7 +180,7 @@ impl<L: Language, D> ResolvedNode<L, D> {
|
|||
/// source text covered by this node, i.e. the combined text of all token leafs of the subtree
|
||||
/// originating in this node.
|
||||
#[inline]
|
||||
pub fn text(&self) -> SyntaxText<'_, '_, dyn Resolver, L, D> {
|
||||
pub fn text(&self) -> SyntaxText<'_, '_, dyn Resolver<TokenKey>, L, D> {
|
||||
SyntaxText::new(self, &**self.resolver())
|
||||
}
|
||||
}
|
||||
|
@ -266,13 +269,13 @@ macro_rules! forward_node {
|
|||
|
||||
impl<L: Language, D> ResolvedNode<L, D> {
|
||||
/// Returns the [`Resolver`] associated with this tree.
|
||||
pub fn resolver(&self) -> &StdArc<dyn Resolver> {
|
||||
pub fn resolver(&self) -> &StdArc<dyn Resolver<TokenKey>> {
|
||||
self.syntax.resolver().unwrap()
|
||||
}
|
||||
|
||||
/// See [`SyntaxNode::new_root_with_resolver`].
|
||||
#[inline]
|
||||
pub fn new_root_with_resolver(green: GreenNode, resolver: impl Resolver + 'static) -> Self {
|
||||
pub fn new_root_with_resolver(green: GreenNode, resolver: impl Resolver<TokenKey> + 'static) -> Self {
|
||||
SyntaxNode::new_root_with_resolver(green, resolver)
|
||||
}
|
||||
|
||||
|
@ -498,7 +501,7 @@ impl<L: Language, D> ResolvedNode<L, D> {
|
|||
|
||||
impl<L: Language, D> ResolvedToken<L, D> {
|
||||
/// Returns the [`Resolver`] associated with this tree.
|
||||
pub fn resolver(&self) -> &StdArc<dyn Resolver> {
|
||||
pub fn resolver(&self) -> &StdArc<dyn Resolver<TokenKey>> {
|
||||
self.syntax.resolver().unwrap()
|
||||
}
|
||||
|
||||
|
@ -575,7 +578,7 @@ impl<L: Language, D> ResolvedElement<L, D> {
|
|||
|
||||
/// The internal representation of the kind of this element.
|
||||
#[inline]
|
||||
pub fn syntax_kind(&self) -> SyntaxKind {
|
||||
pub fn syntax_kind(&self) -> RawSyntaxKind {
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.syntax_kind(),
|
||||
NodeOrToken::Token(it) => it.syntax_kind(),
|
||||
|
@ -658,7 +661,7 @@ impl<'a, L: Language, D> ResolvedElementRef<'a, L, D> {
|
|||
|
||||
/// The internal representation of the kind of this element.
|
||||
#[inline]
|
||||
pub fn syntax_kind(&self) -> SyntaxKind {
|
||||
pub fn syntax_kind(&self) -> RawSyntaxKind {
|
||||
match self {
|
||||
NodeOrToken::Node(it) => it.syntax_kind(),
|
||||
NodeOrToken::Token(it) => it.syntax_kind(),
|
||||
|
|
|
@ -2,7 +2,12 @@
|
|||
|
||||
use std::fmt;
|
||||
|
||||
use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, TextSize};
|
||||
use crate::{
|
||||
interning::{Resolver, TokenKey},
|
||||
syntax::{SyntaxNode, SyntaxToken},
|
||||
text::{TextRange, TextSize},
|
||||
Language,
|
||||
};
|
||||
|
||||
/// An efficient representation of the text that is covered by a [`SyntaxNode`], i.e. the combined
|
||||
/// source text of all tokens that are descendants of the node.
|
||||
|
@ -14,7 +19,7 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T
|
|||
/// # Example
|
||||
/// ```
|
||||
/// # use cstree::testing::*;
|
||||
/// # use cstree::interning::IntoResolver;
|
||||
/// # use cstree::syntax::ResolvedNode;
|
||||
/// #
|
||||
/// fn parse_float_literal(s: &str) -> ResolvedNode<MyLanguage> {
|
||||
/// // parsing...
|
||||
|
@ -23,7 +28,7 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T
|
|||
/// # builder.token(Float, s);
|
||||
/// # builder.finish_node();
|
||||
/// # let (root, cache) = builder.finish();
|
||||
/// # let resolver = cache.unwrap().into_interner().unwrap().into_resolver();
|
||||
/// # let resolver = cache.unwrap().into_interner().unwrap();
|
||||
/// # SyntaxNode::new_root_with_resolver(root, resolver)
|
||||
/// }
|
||||
/// let float_node = parse_float_literal("2.748E2");
|
||||
|
@ -42,7 +47,7 @@ pub struct SyntaxText<'n, 'i, I: ?Sized, L: Language, D: 'static = ()> {
|
|||
resolver: &'i I,
|
||||
}
|
||||
|
||||
impl<'n, 'i, I: Resolver + ?Sized, L: Language, D> SyntaxText<'n, 'i, I, L, D> {
|
||||
impl<'n, 'i, I: Resolver<TokenKey> + ?Sized, L: Language, D> SyntaxText<'n, 'i, I, L, D> {
|
||||
pub(crate) fn new(node: &'n SyntaxNode<L, D>, resolver: &'i I) -> Self {
|
||||
let range = node.text_range();
|
||||
SyntaxText { node, range, resolver }
|
||||
|
@ -203,25 +208,25 @@ fn found<T>(res: Result<(), T>) -> Option<T> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<I: Resolver + ?Sized, L: Language, D> fmt::Debug for SyntaxText<'_, '_, I, L, D> {
|
||||
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> fmt::Debug for SyntaxText<'_, '_, I, L, D> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(&self.to_string(), f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Resolver + ?Sized, L: Language, D> fmt::Display for SyntaxText<'_, '_, I, L, D> {
|
||||
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> fmt::Display for SyntaxText<'_, '_, I, L, D> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
self.try_for_each_chunk(|chunk| fmt::Display::fmt(chunk, f))
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Resolver + ?Sized, L: Language, D> From<SyntaxText<'_, '_, I, L, D>> for String {
|
||||
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> From<SyntaxText<'_, '_, I, L, D>> for String {
|
||||
fn from(text: SyntaxText<'_, '_, I, L, D>) -> String {
|
||||
text.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<str> for SyntaxText<'_, '_, I, L, D> {
|
||||
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> PartialEq<str> for SyntaxText<'_, '_, I, L, D> {
|
||||
fn eq(&self, mut rhs: &str) -> bool {
|
||||
self.try_for_each_chunk(|chunk| {
|
||||
if !rhs.starts_with(chunk) {
|
||||
|
@ -235,19 +240,19 @@ impl<I: Resolver + ?Sized, L: Language, D> PartialEq<str> for SyntaxText<'_, '_,
|
|||
}
|
||||
}
|
||||
|
||||
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for str {
|
||||
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for str {
|
||||
fn eq(&self, rhs: &SyntaxText<'_, '_, I, L, D>) -> bool {
|
||||
rhs == self
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<&'_ str> for SyntaxText<'_, '_, I, L, D> {
|
||||
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> PartialEq<&'_ str> for SyntaxText<'_, '_, I, L, D> {
|
||||
fn eq(&self, rhs: &&str) -> bool {
|
||||
self == *rhs
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for &'_ str {
|
||||
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for &'_ str {
|
||||
fn eq(&self, rhs: &SyntaxText<'_, '_, I, L, D>) -> bool {
|
||||
rhs == self
|
||||
}
|
||||
|
@ -258,8 +263,8 @@ impl<'n1, 'i1, 'n2, 'i2, I1, I2, L1, L2, D1, D2> PartialEq<SyntaxText<'n2, 'i2,
|
|||
where
|
||||
L1: Language,
|
||||
L2: Language,
|
||||
I1: Resolver + ?Sized,
|
||||
I2: Resolver + ?Sized,
|
||||
I1: Resolver<TokenKey> + ?Sized,
|
||||
I2: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
fn eq(&self, other: &SyntaxText<'_, '_, I2, L2, D2>) -> bool {
|
||||
if self.range.len() != other.range.len() {
|
||||
|
@ -282,8 +287,8 @@ fn zip_texts<'it1, 'it2, It1, It2, I1, I2, L1, L2, D1, D2>(
|
|||
where
|
||||
It1: Iterator<Item = (&'it1 SyntaxToken<L1, D1>, TextRange)>,
|
||||
It2: Iterator<Item = (&'it2 SyntaxToken<L2, D2>, TextRange)>,
|
||||
I1: Resolver + ?Sized,
|
||||
I2: Resolver + ?Sized,
|
||||
I1: Resolver<TokenKey> + ?Sized,
|
||||
I2: Resolver<TokenKey> + ?Sized,
|
||||
D1: 'static,
|
||||
D2: 'static,
|
||||
L1: Language + 'it1,
|
||||
|
@ -309,12 +314,12 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
impl<I: Resolver + ?Sized, L: Language, D> Eq for SyntaxText<'_, '_, I, L, D> {}
|
||||
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> Eq for SyntaxText<'_, '_, I, L, D> {}
|
||||
|
||||
mod private {
|
||||
use std::ops;
|
||||
|
||||
use crate::{TextRange, TextSize};
|
||||
use crate::text::{TextRange, TextSize};
|
||||
|
||||
pub trait SyntaxTextRange {
|
||||
fn start(&self) -> Option<TextSize>;
|
||||
|
@ -374,27 +379,27 @@ mod private {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{green::SyntaxKind, GreenNodeBuilder};
|
||||
use crate::{build::GreenNodeBuilder, RawSyntaxKind};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
pub enum TestLang {}
|
||||
impl Language for TestLang {
|
||||
type Kind = SyntaxKind;
|
||||
type Kind = RawSyntaxKind;
|
||||
|
||||
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
|
||||
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||
raw
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
|
||||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||
kind
|
||||
}
|
||||
|
||||
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||
if kind == SyntaxKind(1) {
|
||||
if kind == RawSyntaxKind(1) {
|
||||
Some("{")
|
||||
} else if kind == SyntaxKind(2) {
|
||||
} else if kind == RawSyntaxKind(2) {
|
||||
Some("}")
|
||||
} else {
|
||||
None
|
||||
|
@ -402,16 +407,16 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
fn build_tree(chunks: &[&str]) -> (SyntaxNode<TestLang, ()>, impl Resolver) {
|
||||
fn build_tree(chunks: &[&str]) -> (SyntaxNode<TestLang, ()>, impl Resolver<TokenKey>) {
|
||||
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
|
||||
builder.start_node(SyntaxKind(62));
|
||||
builder.start_node(RawSyntaxKind(62));
|
||||
for &chunk in chunks.iter() {
|
||||
let kind = match chunk {
|
||||
"{" => 1,
|
||||
"}" => 2,
|
||||
_ => 3,
|
||||
};
|
||||
builder.token(SyntaxKind(kind), chunk);
|
||||
builder.token(RawSyntaxKind(kind), chunk);
|
||||
}
|
||||
builder.finish_node();
|
||||
let (node, cache) = builder.finish();
|
||||
|
|
|
@ -5,11 +5,15 @@ use std::{
|
|||
sync::Arc as StdArc,
|
||||
};
|
||||
|
||||
use lasso::Resolver;
|
||||
use text_size::{TextRange, TextSize};
|
||||
|
||||
use super::*;
|
||||
use crate::{interning::Key, Direction, GreenNode, GreenToken, Language, SyntaxKind};
|
||||
use crate::{
|
||||
green::{GreenNode, GreenToken},
|
||||
interning::{Resolver, TokenKey},
|
||||
traversal::Direction,
|
||||
Language, RawSyntaxKind,
|
||||
};
|
||||
|
||||
/// Syntax tree token.
|
||||
#[derive(Debug)]
|
||||
|
@ -49,7 +53,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
|||
/// Writes this token's [`Debug`](fmt::Debug) representation into the given `target`.
|
||||
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
write!(target, "{:?}@{:?}", self.kind(), self.text_range())?;
|
||||
let text = self.resolve_text(resolver);
|
||||
|
@ -72,7 +76,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
|||
#[inline]
|
||||
pub fn debug<R>(&self, resolver: &R) -> String
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
// NOTE: `fmt::Write` methods on `String` never fail
|
||||
let mut res = String::new();
|
||||
|
@ -84,7 +88,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
|||
#[inline]
|
||||
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
write!(target, "{}", self.resolve_text(resolver))
|
||||
}
|
||||
|
@ -95,25 +99,26 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
|||
#[inline]
|
||||
pub fn display<R>(&self, resolver: &R) -> String
|
||||
where
|
||||
R: Resolver + ?Sized,
|
||||
R: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
self.resolve_text(resolver).to_string()
|
||||
}
|
||||
|
||||
/// If there is a resolver associated with this tree, returns it.
|
||||
#[inline]
|
||||
pub fn resolver(&self) -> Option<&StdArc<dyn Resolver>> {
|
||||
pub fn resolver(&self) -> Option<&StdArc<dyn Resolver<TokenKey>>> {
|
||||
self.parent.resolver()
|
||||
}
|
||||
|
||||
/// Turns this token into a [`ResolvedToken`], but only if there is a resolver associated with this tree.
|
||||
/// Turns this token into a [`ResolvedToken`](crate::syntax::ResolvedToken), but only if there is a resolver
|
||||
/// associated with this tree.
|
||||
#[inline]
|
||||
pub fn try_resolved(&self) -> Option<&ResolvedToken<L, D>> {
|
||||
// safety: we only coerce if `resolver` exists
|
||||
self.resolver().map(|_| unsafe { ResolvedToken::coerce_ref(self) })
|
||||
}
|
||||
|
||||
/// Turns this token into a [`ResolvedToken`].
|
||||
/// Turns this token into a [`ResolvedToken`](crate::syntax::ResolvedToken).
|
||||
/// # Panics
|
||||
/// If there is no resolver associated with this tree.
|
||||
#[inline]
|
||||
|
@ -153,7 +158,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
|||
|
||||
/// The internal representation of the kind of this token.
|
||||
#[inline]
|
||||
pub fn syntax_kind(&self) -> SyntaxKind {
|
||||
pub fn syntax_kind(&self) -> RawSyntaxKind {
|
||||
self.green().kind()
|
||||
}
|
||||
|
||||
|
@ -176,7 +181,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
|||
#[inline]
|
||||
pub fn resolve_text<'i, I>(&self, resolver: &'i I) -> &'i str
|
||||
where
|
||||
I: Resolver + ?Sized,
|
||||
I: Resolver<TokenKey> + ?Sized,
|
||||
{
|
||||
// one of the two must be present upon construction
|
||||
self.static_text().or_else(|| self.green().text(resolver)).unwrap()
|
||||
|
@ -191,6 +196,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
|||
///
|
||||
/// ```
|
||||
/// # use cstree::testing::*;
|
||||
/// # use cstree::build::*;
|
||||
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
|
||||
/// # builder.start_node(Root);
|
||||
/// # builder.token(Identifier, "x");
|
||||
|
@ -278,18 +284,18 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
|||
/// implementation by re-using the interner in both.
|
||||
/// ```
|
||||
/// # use cstree::testing::*;
|
||||
/// use cstree::interning::{new_interner, Hasher, Key, Rodeo};
|
||||
/// use cstree::interning::{new_interner, TokenInterner, TokenKey};
|
||||
/// struct TypeTable {
|
||||
/// // ...
|
||||
/// }
|
||||
/// impl TypeTable {
|
||||
/// fn type_of(&self, ident: Key) -> &str {
|
||||
/// fn type_of(&self, ident: TokenKey) -> &str {
|
||||
/// // ...
|
||||
/// # ""
|
||||
/// }
|
||||
/// }
|
||||
/// # struct State {
|
||||
/// # interner: Rodeo,
|
||||
/// # interner: TokenInterner,
|
||||
/// # type_table: TypeTable,
|
||||
/// # }
|
||||
/// let interner = new_interner();
|
||||
|
@ -297,7 +303,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
|||
/// interner,
|
||||
/// type_table: TypeTable{ /* stuff */},
|
||||
/// };
|
||||
/// let mut builder: GreenNodeBuilder<MyLanguage, Rodeo> =
|
||||
/// let mut builder: GreenNodeBuilder<MyLanguage, TokenInterner> =
|
||||
/// GreenNodeBuilder::with_interner(&mut state.interner);
|
||||
/// # let input = "";
|
||||
/// # builder.start_node(Root);
|
||||
|
@ -315,7 +321,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
|
|||
/// let typ = type_table.type_of(ident.text_key().unwrap());
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn text_key(&self) -> Option<Key> {
|
||||
pub fn text_key(&self) -> Option<TokenKey> {
|
||||
self.green().text_key()
|
||||
}
|
||||
|
||||
|
|
|
@ -109,7 +109,7 @@ impl<T> std::ops::Deref for MaybeOwned<'_, T> {
|
|||
fn deref(&self) -> &T {
|
||||
match self {
|
||||
MaybeOwned::Owned(it) => it,
|
||||
MaybeOwned::Borrowed(it) => *it,
|
||||
MaybeOwned::Borrowed(it) => it,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -118,7 +118,7 @@ impl<T> std::ops::DerefMut for MaybeOwned<'_, T> {
|
|||
fn deref_mut(&mut self) -> &mut T {
|
||||
match self {
|
||||
MaybeOwned::Owned(it) => it,
|
||||
MaybeOwned::Borrowed(it) => *it,
|
||||
MaybeOwned::Borrowed(it) => it,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
use super::*;
|
||||
use cstree::{GreenNodeBuilder, NodeCache, SyntaxKind, TextRange};
|
||||
use lasso::{Resolver, Rodeo};
|
||||
use cstree::{
|
||||
build::{GreenNodeBuilder, NodeCache},
|
||||
interning::{new_interner, Resolver},
|
||||
text::TextRange,
|
||||
RawSyntaxKind,
|
||||
};
|
||||
|
||||
fn build_tree<D>(root: &Element<'_>) -> (SyntaxNode<D>, impl Resolver) {
|
||||
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
|
||||
|
@ -31,20 +35,20 @@ fn tree_with_eq_tokens() -> Element<'static> {
|
|||
fn create() {
|
||||
let tree = two_level_tree();
|
||||
let (tree, resolver) = build_tree::<()>(&tree);
|
||||
assert_eq!(tree.syntax_kind(), SyntaxKind(0));
|
||||
assert_eq!(tree.kind(), SyntaxKind(0));
|
||||
assert_eq!(tree.syntax_kind(), RawSyntaxKind(0));
|
||||
assert_eq!(tree.kind(), RawSyntaxKind(0));
|
||||
{
|
||||
let leaf1_0 = tree.children().nth(1).unwrap().children_with_tokens().next().unwrap();
|
||||
let leaf1_0 = leaf1_0.into_token().unwrap();
|
||||
assert_eq!(leaf1_0.syntax_kind(), SyntaxKind(5));
|
||||
assert_eq!(leaf1_0.kind(), SyntaxKind(5));
|
||||
assert_eq!(leaf1_0.syntax_kind(), RawSyntaxKind(5));
|
||||
assert_eq!(leaf1_0.kind(), RawSyntaxKind(5));
|
||||
assert_eq!(leaf1_0.resolve_text(&resolver), "1.0");
|
||||
assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into()));
|
||||
}
|
||||
{
|
||||
let node2 = tree.children().nth(2).unwrap();
|
||||
assert_eq!(node2.syntax_kind(), SyntaxKind(6));
|
||||
assert_eq!(node2.kind(), SyntaxKind(6));
|
||||
assert_eq!(node2.syntax_kind(), RawSyntaxKind(6));
|
||||
assert_eq!(node2.kind(), RawSyntaxKind(6));
|
||||
assert_eq!(node2.children_with_tokens().count(), 3);
|
||||
assert_eq!(node2.resolve_text(&resolver), "2.02.12.2");
|
||||
}
|
||||
|
@ -54,7 +58,7 @@ fn create() {
|
|||
fn token_text_eq() {
|
||||
let tree = tree_with_eq_tokens();
|
||||
let (tree, _) = build_tree::<()>(&tree);
|
||||
assert_eq!(tree.kind(), SyntaxKind(0));
|
||||
assert_eq!(tree.kind(), RawSyntaxKind(0));
|
||||
|
||||
let leaf0_0 = tree.children().next().unwrap().children_with_tokens().next().unwrap();
|
||||
let leaf0_0 = leaf0_0.into_token().unwrap();
|
||||
|
@ -115,7 +119,7 @@ fn data() {
|
|||
|
||||
#[test]
|
||||
fn with_interner() {
|
||||
let mut interner = Rodeo::new();
|
||||
let mut interner = new_interner();
|
||||
let mut cache = NodeCache::with_interner(&mut interner);
|
||||
let tree = two_level_tree();
|
||||
let tree = build_tree_with_cache(&tree, &mut cache);
|
||||
|
@ -135,7 +139,7 @@ fn with_interner() {
|
|||
|
||||
#[test]
|
||||
fn inline_resolver() {
|
||||
let mut interner = Rodeo::new();
|
||||
let mut interner = new_interner();
|
||||
let mut cache = NodeCache::with_interner(&mut interner);
|
||||
let tree = two_level_tree();
|
||||
let tree = build_tree_with_cache(&tree, &mut cache);
|
||||
|
@ -146,7 +150,7 @@ fn inline_resolver() {
|
|||
assert_eq!(leaf1_0.text(), "1.0");
|
||||
assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into()));
|
||||
assert_eq!(format!("{}", leaf1_0), leaf1_0.text());
|
||||
assert_eq!(format!("{:?}", leaf1_0), "SyntaxKind(5)@6..9 \"1.0\"");
|
||||
assert_eq!(format!("{:?}", leaf1_0), "RawSyntaxKind(5)@6..9 \"1.0\"");
|
||||
}
|
||||
{
|
||||
let node2 = tree.children().nth(2).unwrap();
|
||||
|
@ -154,13 +158,13 @@ fn inline_resolver() {
|
|||
let resolver = node2.resolver();
|
||||
assert_eq!(node2.resolve_text(resolver.as_ref()), node2.text());
|
||||
assert_eq!(format!("{}", node2).as_str(), node2.text());
|
||||
assert_eq!(format!("{:?}", node2), "SyntaxKind(6)@9..18");
|
||||
assert_eq!(format!("{:?}", node2), "RawSyntaxKind(6)@9..18");
|
||||
assert_eq!(
|
||||
format!("{:#?}", node2),
|
||||
r#"SyntaxKind(6)@9..18
|
||||
SyntaxKind(7)@9..12 "2.0"
|
||||
SyntaxKind(8)@12..15 "2.1"
|
||||
SyntaxKind(9)@15..18 "2.2"
|
||||
r#"RawSyntaxKind(6)@9..18
|
||||
RawSyntaxKind(7)@9..12 "2.0"
|
||||
RawSyntaxKind(8)@12..15 "2.1"
|
||||
RawSyntaxKind(9)@15..18 "2.2"
|
||||
"#
|
||||
);
|
||||
}
|
||||
|
@ -175,7 +179,7 @@ fn assert_debug_display() {
|
|||
f::<ResolvedToken>();
|
||||
f::<ResolvedElement>();
|
||||
f::<ResolvedElementRef<'static>>();
|
||||
f::<cstree::NodeOrToken<String, u128>>();
|
||||
f::<cstree::util::NodeOrToken<String, u128>>();
|
||||
|
||||
fn dbg<T: fmt::Debug>() {}
|
||||
dbg::<GreenNodeBuilder<'static, 'static, TestLang>>();
|
||||
|
|
|
@ -4,18 +4,22 @@ mod sendsync;
|
|||
#[cfg(feature = "serialize")]
|
||||
mod serde;
|
||||
|
||||
use cstree::{GreenNode, GreenNodeBuilder, Language, NodeCache, SyntaxKind};
|
||||
use lasso::Interner;
|
||||
use cstree::{
|
||||
build::{GreenNodeBuilder, NodeCache},
|
||||
green::GreenNode,
|
||||
interning::Interner,
|
||||
Language, RawSyntaxKind,
|
||||
};
|
||||
|
||||
pub type SyntaxNode<D = ()> = cstree::SyntaxNode<TestLang, D>;
|
||||
pub type SyntaxToken<D = ()> = cstree::SyntaxToken<TestLang, D>;
|
||||
pub type SyntaxElement<D = ()> = cstree::SyntaxElement<TestLang, D>;
|
||||
pub type SyntaxElementRef<'a, D = ()> = cstree::SyntaxElementRef<'a, TestLang, D>;
|
||||
pub type SyntaxNode<D = ()> = cstree::syntax::SyntaxNode<TestLang, D>;
|
||||
pub type SyntaxToken<D = ()> = cstree::syntax::SyntaxToken<TestLang, D>;
|
||||
pub type SyntaxElement<D = ()> = cstree::syntax::SyntaxElement<TestLang, D>;
|
||||
pub type SyntaxElementRef<'a, D = ()> = cstree::syntax::SyntaxElementRef<'a, TestLang, D>;
|
||||
|
||||
pub type ResolvedNode<D = ()> = cstree::ResolvedNode<TestLang, D>;
|
||||
pub type ResolvedToken<D = ()> = cstree::ResolvedToken<TestLang, D>;
|
||||
pub type ResolvedElement<D = ()> = cstree::ResolvedElement<TestLang, D>;
|
||||
pub type ResolvedElementRef<'a, D = ()> = cstree::ResolvedElementRef<'a, TestLang, D>;
|
||||
pub type ResolvedNode<D = ()> = cstree::syntax::ResolvedNode<TestLang, D>;
|
||||
pub type ResolvedToken<D = ()> = cstree::syntax::ResolvedToken<TestLang, D>;
|
||||
pub type ResolvedElement<D = ()> = cstree::syntax::ResolvedElement<TestLang, D>;
|
||||
pub type ResolvedElementRef<'a, D = ()> = cstree::syntax::ResolvedElementRef<'a, TestLang, D>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Element<'s> {
|
||||
|
@ -26,13 +30,13 @@ pub enum Element<'s> {
|
|||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
pub enum TestLang {}
|
||||
impl Language for TestLang {
|
||||
type Kind = SyntaxKind;
|
||||
type Kind = RawSyntaxKind;
|
||||
|
||||
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
|
||||
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||
raw
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
|
||||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||
kind
|
||||
}
|
||||
|
||||
|
@ -41,7 +45,7 @@ impl Language for TestLang {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn build_tree_with_cache<'c, 'i, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode
|
||||
pub fn build_tree_with_cache<I>(root: &Element<'_>, cache: &mut NodeCache<'_, I>) -> GreenNode
|
||||
where
|
||||
I: Interner,
|
||||
{
|
||||
|
@ -52,25 +56,21 @@ where
|
|||
node
|
||||
}
|
||||
|
||||
pub fn build_recursive<'c, 'i, L, I>(
|
||||
root: &Element<'_>,
|
||||
builder: &mut GreenNodeBuilder<'c, 'i, L, I>,
|
||||
mut from: u16,
|
||||
) -> u16
|
||||
pub fn build_recursive<L, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u16) -> u16
|
||||
where
|
||||
L: Language<Kind = SyntaxKind>,
|
||||
L: Language<Kind = RawSyntaxKind>,
|
||||
I: Interner,
|
||||
{
|
||||
match root {
|
||||
Element::Node(children) => {
|
||||
builder.start_node(SyntaxKind(from));
|
||||
builder.start_node(RawSyntaxKind(from));
|
||||
for child in children {
|
||||
from = build_recursive(child, builder, from + 1);
|
||||
}
|
||||
builder.finish_node();
|
||||
}
|
||||
Element::Token(text) => {
|
||||
builder.token(SyntaxKind(from), *text);
|
||||
builder.token(RawSyntaxKind(from), text);
|
||||
}
|
||||
}
|
||||
from
|
||||
|
|
|
@ -3,7 +3,7 @@ fn empty_tree_arc() {
|
|||
// this test is not here for the test itself, but to run it through MIRI, who complained about out-of-bound
|
||||
// `ThinArc` pointers for a root `GreenNode` with no children
|
||||
|
||||
use cstree::*;
|
||||
use cstree::{build::GreenNodeBuilder, syntax::SyntaxNode};
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[repr(u16)]
|
||||
|
@ -16,13 +16,13 @@ fn empty_tree_arc() {
|
|||
// ...
|
||||
type Kind = SyntaxKind;
|
||||
|
||||
fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
|
||||
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||
assert!(raw.0 <= SyntaxKind::Root as u16);
|
||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
|
||||
cstree::SyntaxKind(kind as u16)
|
||||
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||
cstree::RawSyntaxKind(kind as u16)
|
||||
}
|
||||
|
||||
fn static_text(_kind: Self::Kind) -> Option<&'static str> {
|
||||
|
|
|
@ -4,13 +4,29 @@ use crossbeam_utils::thread::scope;
|
|||
use std::{thread, time::Duration};
|
||||
|
||||
use super::{build_recursive, Element, ResolvedNode, SyntaxNode, TestLang};
|
||||
use cstree::{interning::IntoResolver, GreenNodeBuilder};
|
||||
use cstree::build::GreenNodeBuilder;
|
||||
|
||||
// Excercise the multi-threaded interner when the corresponding feature is enabled.
|
||||
|
||||
#[cfg(feature = "multi_threaded_interning")]
|
||||
use cstree::interning::{new_threaded_interner, MultiThreadedTokenInterner};
|
||||
|
||||
#[cfg(not(feature = "multi_threaded_interning"))]
|
||||
fn get_builder() -> GreenNodeBuilder<'static, 'static, TestLang> {
|
||||
GreenNodeBuilder::new()
|
||||
}
|
||||
|
||||
#[cfg(feature = "multi_threaded_interning")]
|
||||
fn get_builder() -> GreenNodeBuilder<'static, 'static, TestLang, MultiThreadedTokenInterner> {
|
||||
let interner = new_threaded_interner();
|
||||
GreenNodeBuilder::from_interner(interner)
|
||||
}
|
||||
|
||||
fn build_tree<D>(root: &Element<'_>) -> ResolvedNode<D> {
|
||||
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
|
||||
let mut builder = get_builder();
|
||||
build_recursive(root, &mut builder, 0);
|
||||
let (node, cache) = builder.finish();
|
||||
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap().into_resolver())
|
||||
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap())
|
||||
}
|
||||
|
||||
fn two_level_tree() -> Element<'static> {
|
||||
|
|
|
@ -2,8 +2,9 @@ use crate::{build_recursive, build_tree_with_cache, ResolvedNode};
|
|||
|
||||
use super::{Element, SyntaxNode, TestLang};
|
||||
use cstree::{
|
||||
interning::{new_interner, IntoResolver},
|
||||
GreenNodeBuilder, NodeCache, NodeOrToken,
|
||||
build::{GreenNodeBuilder, NodeCache},
|
||||
interning::new_interner,
|
||||
util::NodeOrToken,
|
||||
};
|
||||
use serde_test::Token;
|
||||
use std::fmt;
|
||||
|
@ -227,7 +228,7 @@ fn build_tree(root: Element<'_>) -> ResolvedNode<String> {
|
|||
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
|
||||
build_recursive(&root, &mut builder, 0);
|
||||
let (node, cache) = builder.finish();
|
||||
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap().into_resolver())
|
||||
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap())
|
||||
}
|
||||
|
||||
fn attach_data(node: &SyntaxNode<String>) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue