1
Fork 0
mirror of https://github.com/RGBCube/cstree synced 2025-07-27 09:07:44 +00:00

Set up a module structure (#44)

This commit is contained in:
DQ 2023-04-07 18:06:51 +02:00 committed by GitHub
parent baa0a9f2f0
commit 16f7a3bd80
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
38 changed files with 2291 additions and 454 deletions

View file

@ -31,8 +31,19 @@ jobs:
- uses: hecrj/setup-rust-action@v1
with:
rust-version: ${{ matrix.rust }}
- run: cargo test --verbose --all-features
- run: cargo test --release --verbose --all-features
- uses: actions-rs/cargo@v1
with:
command: test
args: --all-targets --verbose
- uses: actions-rs/cargo@v1
with:
command: test
args: --all-targets --verbose --all-features
- uses: actions-rs/cargo@v1
with:
command: test
args: --all-targets --verbose --all-features --release
check:
name: Check
@ -48,6 +59,7 @@ jobs:
- uses: actions-rs/cargo@v1
with:
command: check
args: --all-targets --all-features
clippy:
name: Clippy
@ -79,11 +91,13 @@ jobs:
name: Check doc links
runs-on: ubuntu-latest
env:
RUSTDOCFLAGS: -Dwarnings
RUSTDOCFLAGS: -Dwarnings --cfg doc_cfg
steps:
- uses: actions/checkout@v2
- uses: hecrj/setup-rust-action@v1
with:
rust-version: nightly
- run: cargo doc --all-features --document-private-items --no-deps
miri-test:

View file

@ -2,7 +2,52 @@
## `v0.12.0`
* Documentation has been improved in most areas, together with a switch to a more principled module structure that allows explicitly documenting submodules.
* The `interning` module has been rewritten. It now provides fuctions for obtaining a default interner (`new_interner` and `new_threaded_interner`) and provides a small, dependency-free interner implementation.
* Compatibility with other interners can be enable via feature flags.
* **Note** that compatibilty with `lasso` is not enabled by default. Use the `lasso_compat` feature to match the previous default.
* Introduced `Language::static_text` to optimize tokens that always appear with the same text (estimated 10-15% faster tree building when used, depending on the ratio of static to dynamic tokens).
* Since `cstree`s are lossless, `GreenNodeBuilder::token` must still be passed the source text even for static tokens.
* Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements.
* Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option<SyntaxNode>` is now the same size as `SyntaxNode` itself: the size of a pointer).
* `SyntaxKind` has been renamed to `RawSyntaxKind` to no longer conflict with user-defined `SyntaxKind` enumerations.
* The crate's export module structure has been reorganized to give different groups of definitions their own submodules. A `cstree::prelude` module is available, containing the most commonly needed types that were previously accessible via `use cstree::*`. Otherwise, the module structure is now as follows:
* `cstree`
* `Language`
* `RawSyntaxKind`
* `build`
* `GreenNodeBuilder`
* `NodeCache`
* `Checkpoint`
* `green`
* `GreenNode`
* `GreenToken`
* `GreenNodeChildren`
* `syntax`
* `{Syntax,Resolved}Node`
* `{Syntax,Resolved}Token`
* `{Syntax,Resolved}Element`
* `{Syntax,Resolved}ElementRef`
* `SyntaxNodeChildren`
* `SyntaxElementChildren`
* `SyntaxText`
* `interning`
* `TokenKey` and the `InternKey` trait
* `Interner` and `Resolver` traits
* `new_interner` and `TokenInterner`
* `new_threaded_interner` and `MultiThreadedTokenInterner` (with the `multi_threaded_interning` feature enabled)
* compatibility implementations for interning crates depending on selected feature flags
* `text`
* `TextSize`
* `TextRange`
* `SyntaxText` (re-export)
* `traversal`
* `Direction`
* `WalkEvent`
* `util`
* `NodeOrToken`
* `TokenAtOffset`
* `sync`
* `Arc`
* `prelude`
* re-exports of the most-used items

View file

@ -1,7 +1,7 @@
[package]
edition = "2021"
name = "cstree"
version = "0.11.1"
version = "0.12.0-rc.0" # when updating, also update `#![doc(html_root_url)]`
authors = [
"Domenic Quirl <DomenicQuirl@pm.me>",
"Aleksey Kladov <aleksey.kladov@gmail.com>",
@ -15,7 +15,6 @@ readme = "README.md"
debug = true
[dependencies]
lasso = { version = "0.6", features = ["inline-more", "multi-threaded"] }
text-size = "1.1.0"
fxhash = "0.2.1"
parking_lot = "0.11.2"
@ -24,6 +23,20 @@ parking_lot = "0.11.2"
triomphe = "0.1.7"
sptr = "0.3.2"
# Default Interner
indexmap = "1.9"
[dependencies.lasso]
version = "0.6"
features = ["inline-more"]
optional = true
[dependencies.salsa]
git = "https://github.com/salsa-rs/salsa/"
version = "0.1"
optional = true
package = "salsa-2022"
[dependencies.serde]
version = "1.0"
optional = true
@ -43,7 +56,24 @@ harness = false
[features]
default = []
serialize = ["serde", "lasso/serialize"]
# Implementations of `serde::{De,}Serialize` for CSTrees.
serialize = ["serde", "lasso?/serialize"]
# Interoperability with the `lasso` interning crate.
# When enabled, `cstree`'s default interners will use `lasso` internally, too.
lasso_compat = ["lasso"]
# Additionally provide threadsafe interner types.
# Where applicable (and if the corresponding features are selected), provide compatibility
# implementations for multi-thread interners from other crates.
multi_threaded_interning = ["lasso_compat", "lasso/multi-threaded"]
# Interoperability with the `salsa` framework for incremental computation.
# Use this feature for "Salsa 2022".
# WARNING: This feature is considered unstable!
salsa_2022_compat = ["salsa"]
[[example]]
name = "salsa"
required-features = ["salsa_2022_compat"]
[package.metadata.docs.rs]
features = ["serialize"]
all-features = true
rustdoc-args = ["--cfg", "doc_cfg"]

287
README.md
View file

@ -32,8 +32,291 @@ Notable differences of `cstree` compared to `rowan`:
- Performance optimizations for tree traversal: persisting red nodes allows tree traversal methods to return references. You can still `clone` to obtain an owned node, but you only pay that cost when you need to.
## Getting Started
The main entry points for constructing syntax trees are `GreenNodeBuilder` and `SyntaxNode::new_root` for green and red trees respectively.
See `examples/s_expressions` for a guided tutorial to `cstree`.
If you're looking at `cstree`, you're probably looking at or already writing a parser and are considering using
concrete syntax trees as its output. We'll talk more about parsing below -- first, let's have a look at what needs
to happen to go from input text to a `cstree` syntax tree:
1. Define an enumeration of the types of tokens (like keywords) and nodes (like "an expression")
that you want to have in your syntax and implement `Language`
2. Create a `GreenNodeBuilder` and call `start_node`, `token` and `finish_node` from your parser
3. Call `SyntaxNode::new_root` or `SyntaxNode::new_root_with_resolver` with the resulting
`GreenNode` to obtain a syntax tree that you can traverse
Let's walk through the motions of parsing a (very) simple language into `cstree` syntax trees.
We'll just support addition and subtraction on integers, from which the user is allowed to construct a single,
compound expression. They will, however, be allowed to write nested expressions in parentheses, like `1 - (2 + 5)`.
### Defining the language
First, we need to list the different part of our language's grammar.
We can do that using an `enum` with a unit variant for any terminal and non-terminal.
The `enum` needs to be convertible to a `u16`, so we use the `repr` attribute to ensure it uses the correct
representation.
```rust
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
enum SyntaxKind {
/* Tokens */
Int, // 42
Plus, // +
Minus, // -
LParen, // (
RParen, // )
/* Nodes */
Expr,
Root,
}
```
Most of these are tokens to lex the input string into, like numbers (`Int`) and operators (`Plus`, `Minus`).
We only really need one type of node; expressions.
Our syntax tree's root node will have the special kind `Root`, all other nodes will be
expressions containing a sequence of arithmetic operations potentially involving further, nested
expression nodes.
To use our `SyntaxKind`s with `cstree`, we need to tell it how to convert it back to just a number (the
`#[repr(u16)]` that we added) by implementing the `Language` trait. We can also tell `cstree` about tokens that
always have the same text through the `static_text` method on the trait. This is useful for the operators and
parentheses, but not possible for numbers, since an integer token may be produced from the input `3`, but also from
other numbers like `7` or `12`. We implement `Language` on an empty type, just so we can give it a name.
```rust
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Calculator;
impl Language for Calculator {
// The tokens and nodes we just defined
type Kind = SyntaxKind;
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
// This just needs to be the inverse of `kind_to_raw`, but could also
// be an `impl TryFrom<u16> for SyntaxKind` or any other conversion.
match raw.0 {
0 => SyntaxKind::Int,
1 => SyntaxKind::Plus,
2 => SyntaxKind::Minus,
3 => SyntaxKind::LParen,
4 => SyntaxKind::RParen,
5 => SyntaxKind::Expr,
6 => SyntaxKind::Root,
n => panic!("Unknown raw syntax kind: {n}"),
}
}
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
RawSyntaxKind(kind as u16)
}
fn static_text(kind: Self::Kind) -> Option<&'static str> {
match kind {
SyntaxKind::Plus => Some("+"),
SyntaxKind::Minus => Some("-"),
SyntaxKind::LParen => Some("("),
SyntaxKind::RParen => Some(")"),
_ => None,
}
}
}
```
### Parsing into a green tree
With that out of the way, we can start writing the parser for our expressions.
For the purposes of this introduction to `cstree`, I'll assume that there is a lexer that yields the following
tokens:
```rust
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum Token<'input> {
// Note that number strings are not yet parsed into actual numbers,
// we just remember the slice of the input that contains their digits
Int(&'input str),
Plus,
Minus,
LParen,
RParen,
// A special token that indicates that we have reached the end of the file
EoF,
}
```
A simple lexer that yields such tokens is part of the full `readme` example, but we'll be busy enough with the
combination of `cstree` and the actual parser, which we define like this:
```rust
pub struct Parser<'input> {
// `Peekable` is a standard library iterator adapter that allows
// looking ahead at the next item without removing it from the iterator yet
lexer: Peekable<Lexer<'input>>,
builder: GreenNodeBuilder<'static, 'static, Calculator>,
}
impl<'input> Parser<'input> {
pub fn new(input: &'input str) -> Self {
Self {
// we get `peekable` from implementing `Iterator` on `Lexer`
lexer: Lexer::new(input).peekable(),
builder: GreenNodeBuilder::new(),
}
}
pub fn bump(&mut self) -> Option<Token<'input>> {
self.lexer.next()
}
}
```
In contrast to parsers that return abstract syntax trees, with `cstree` the syntax tree nodes for
all element in the language grammar will have the same type: `GreenNode` for the inner ("green")
tree and `SyntaxNode` for the outer ("red") tree. Different kinds of nodes (and tokens) are
differentiated by their `SyntaxKind` tag, which we defined above.
You can implement many types of parsers with `cstree`. To get a feel for how it works, consider
a typical recursive descent parser. With a more traditional AST, one would define different AST
structs for struct or function definitions, statements, expressions and so on. Inside the
parser, the components of any element, such as all fields of a struct or all statements inside a
function, are parsed first and then the parser wraps them in the matching AST type, which is
returned from the corresponding parser function.
Because `cstree`'s syntax trees are untyped, there is no explicit AST representation that the parser
would build. Instead, parsing into a CST using the `GreenNodeBuilder` follows the source code more
closely in that you tell `cstree` about each new element you enter and all tokens that the parser
consumes. So, for example, to parse a struct definition the parser first "enters" the struct
definition node, then parses the `struct` keyword and type name, then parses each field, and finally
"finishes" parsing the struct node.
The most trivial example is the root node for our parser, which just creates a root node
containing the whole expression (we could do without a specific root node if any expression was
a node, in particular if we wrapped integer literal tokens inside `Expr` nodes).
```rust
pub fn parse(&mut self) -> Result<(), String> {
self.builder.start_node(SyntaxKind::Root);
self.parse_expr()?;
self.builder.finish_node();
Ok(())
}
```
As there isn't a static AST type to return, the parser is very flexible as to what is part of a
node. In the previous example, if the user is adding a new field to the struct and has not yet
typed the field's type, the CST node for the struct doesn't care if there is no child node for
it. Similarly, if the user is deleting fields and the source code currently contains a leftover
field name, this additional identifier can be a part of the struct node without any
modifications to the syntax tree definition. This property is the key to why CSTs are such a
good fit as a lossless input representation, which necessitates the syntax tree to mirror the
user-specific layout of whitespace and comments around the AST items.
In the parser for our simple expression language, we'll also have to deal with the fact that,
when we see a number the parser doesn't yet know whether there will be additional operations
following that number. That is, in the expression `1 + 2`, it can only know that it is parsing
a binary operation once it sees the `+`. The event-like model of building trees in `cstree`,
however, implies that when reaching the `+`, the parser would have to have already entered an
expression node in order for the whole input to be part of the expression.
To get around this, `GreenNodeBuilder` provides the `checkpoint` method, which we can call to
"remember" the current position in the input. For example, we can create a checkpoint before the
parser parses the first `1`. Later, when it sees the following `+`, it can create an `Expr` node
for the whole expression using `start_node_at`:
```rust
fn parse_lhs(&mut self) -> Result<(), String> {
// An expression may start either with a number, or with an opening parenthesis that is
// the start of a parenthesized expression
let next_token = *self.lexer.peek().unwrap();
match next_token {
Token::Int(n) => {
self.bump();
self.builder.token(SyntaxKind::Int, n);
}
Token::LParen => {
// Wrap the grouped expression inside a node containing it and its parentheses
self.builder.start_node(SyntaxKind::Expr);
self.bump();
self.builder.static_token(SyntaxKind::LParen);
self.parse_expr()?; // Inner expression
if self.bump() != Some(Token::RParen) {
return Err("Missing ')'".to_string());
}
self.builder.static_token(SyntaxKind::RParen);
self.builder.finish_node();
}
Token::EoF => return Err("Unexpected end of file: expected expression".to_string()),
t => return Err(format!("Unexpected start of expression: '{t:?}'")),
}
Ok(())
}
fn parse_expr(&mut self) -> Result<(), String> {
// Remember our current position
let before_expr = self.builder.checkpoint();
// Parse the start of the expression
self.parse_lhs()?;
// Check if the expression continues with `+ <more>` or `- <more>`
let Some(next_token) = self.lexer.peek() else {
return Ok(());
};
let op = match *next_token {
Token::Plus => SyntaxKind::Plus,
Token::Minus => SyntaxKind::Minus,
Token::RParen | Token::EoF => return Ok(()),
t => return Err(format!("Expected operator, found '{t:?}'")),
};
// If so, retroactively wrap the (already parsed) LHS and the following RHS
// inside an `Expr` node
self.builder.start_node_at(before_expr, SyntaxKind::Expr);
self.bump();
self.builder.static_token(op);
self.parse_expr()?; // RHS
self.builder.finish_node();
Ok(())
}
```
### Obtaining the parser result
Our parser is now capable of parsing our little arithmetic language, but it's methods don't return
anything. So how do we get our syntax tree out? The answer lies in `GreenNodeBuilder::finish`, which
finally returns the tree that we have painstakingly constructed.
```rust
impl Parser<'_> {
pub fn finish(mut self) -> (GreenNode, impl Interner) {
assert!(self.lexer.next().map(|t| t == Token::EoF).unwrap_or(true));
let (tree, cache) = self.builder.finish();
(tree, cache.unwrap().into_interner().unwrap())
}
}
```
`finish` also returns the cache it used to deduplicate tree nodes and tokens, so you can re-use it
for parsing related inputs (e.g., different source files from the same crate may share a lot of
common function and type names that can be deduplicated). See `GreenNodeBuilder`'s documentation for
more information on this, in particular the `with_cache` and `from_cache` methods. Most importantly
for us, we can extract the `Interner` that contains the source text of the tree's tokens from the
cache, which we need if we want to look up things like variable names or the value of numbers for
our calculator.
To work with the syntax tree, you'll want to upgrade it to a `SyntaxNode` using
`SyntaxNode::new_root`. You can also use `SyntaxNode::new_root_with_resolver` to combine tree and
interner, which lets you directly retrieve source text and makes the nodes implement `Display` and
`Debug`. The same output can be produced from `SyntaxNode`s by calling the `debug` or `display`
method with a `Resolver`. To visualize the whole syntax tree, pass `true` for the `recursive`
parameter on `debug`, or simply debug-print a `ResolvedNode`:
```rust
let input = "11 + 2-(5 + 4)";
let mut parser = Parser::new(input);
parser.parse().unwrap();
let (tree, interner) = parser.finish();
let root = SyntaxNode::<Calculator>::new_root_with_resolver(tree, interner);
dbg!(root);
```
## AST Layer
While `cstree` is built for concrete syntax trees, applications are quite easily able to work with either a CST or an AST representation, or freely switch between them.

View file

@ -1,6 +1,10 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use cstree::*;
use lasso::{Interner, Rodeo};
use cstree::{
build::*,
green::GreenNode,
interning::{new_interner, Interner},
Language, RawSyntaxKind,
};
use std::{fmt, hash::Hash};
#[derive(Debug)]
@ -40,7 +44,7 @@ impl Bool for UseStaticText {
impl<T: Bool> Language for TestLang<T> {
type Kind = TestKind;
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
if raw.0 == u16::MAX - 1 {
TestKind::Plus
} else {
@ -48,10 +52,10 @@ impl<T: Bool> Language for TestLang<T> {
}
}
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
match kind {
TestKind::Element { n } => SyntaxKind(n),
TestKind::Plus => SyntaxKind(u16::MAX - 1),
TestKind::Element { n } => RawSyntaxKind(n),
TestKind::Plus => RawSyntaxKind(u16::MAX - 1),
}
}
@ -67,7 +71,7 @@ impl<T: Bool> Language for TestLang<T> {
}
}
pub fn build_tree_with_cache<'c, 'i, T: Bool, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode
pub fn build_tree_with_cache<T: Bool, I>(root: &Element<'_>, cache: &mut NodeCache<'_, I>) -> GreenNode
where
I: Interner,
{
@ -78,9 +82,9 @@ where
node
}
pub fn build_recursive<'c, 'i, T: Bool, I>(
pub fn build_recursive<T: Bool, I>(
root: &Element<'_>,
builder: &mut GreenNodeBuilder<'c, 'i, TestLang<T>, I>,
builder: &mut GreenNodeBuilder<'_, '_, TestLang<T>, I>,
mut from: u16,
) -> u16
where
@ -95,7 +99,7 @@ where
builder.finish_node();
}
Element::Token(text) => {
builder.token(TestKind::Element { n: from }, *text);
builder.token(TestKind::Element { n: from }, text);
}
Element::Plus => {
builder.token(TestKind::Plus, "+");
@ -114,10 +118,15 @@ fn two_level_tree() -> Element<'static> {
}
pub fn create(c: &mut Criterion) {
let mut group = c.benchmark_group("two-level tree");
#[cfg(not(feature = "lasso_compat"))]
const GROUP_NAME: &str = "two-level tree (default interner)";
#[cfg(feature = "lasso_compat")]
const GROUP_NAME: &str = "two-level tree (lasso)";
let mut group = c.benchmark_group(GROUP_NAME);
group.throughput(Throughput::Elements(1));
let mut interner = Rodeo::new();
let mut interner = new_interner();
let mut cache = NodeCache::with_interner(&mut interner);
let tree = two_level_tree();

View file

@ -13,10 +13,7 @@
//! - "+" Token(Add)
//! - "4" Token(Number)
use cstree::{
interning::{IntoResolver, Resolver},
GreenNodeBuilder, NodeOrToken,
};
use cstree::{build::GreenNodeBuilder, interning::Resolver, util::NodeOrToken};
use std::iter::Peekable;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
@ -36,7 +33,7 @@ enum SyntaxKind {
}
use SyntaxKind::*;
impl From<SyntaxKind> for cstree::SyntaxKind {
impl From<SyntaxKind> for cstree::RawSyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
@ -47,12 +44,12 @@ enum Lang {}
impl cstree::Language for Lang {
type Kind = SyntaxKind;
fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
assert!(raw.0 <= Root as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
kind.into()
}
@ -67,12 +64,12 @@ impl cstree::Language for Lang {
}
}
type SyntaxNode = cstree::SyntaxNode<Lang>;
type SyntaxNode = cstree::syntax::SyntaxNode<Lang>;
#[allow(unused)]
type SyntaxToken = cstree::SyntaxToken<Lang>;
type SyntaxToken = cstree::syntax::SyntaxToken<Lang>;
#[allow(unused)]
type SyntaxElement = cstree::NodeOrToken<SyntaxNode, SyntaxToken>;
type SyntaxElementRef<'a> = cstree::NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>;
type SyntaxElement = cstree::util::NodeOrToken<SyntaxNode, SyntaxToken>;
type SyntaxElementRef<'a> = cstree::util::NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>;
struct Parser<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> {
builder: GreenNodeBuilder<'static, 'static, Lang>,
@ -128,10 +125,7 @@ impl<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> Parser<'input, I> {
self.builder.finish_node();
let (tree, cache) = self.builder.finish();
(
SyntaxNode::new_root(tree),
cache.unwrap().into_interner().unwrap().into_resolver(),
)
(SyntaxNode::new_root(tree), cache.unwrap().into_interner().unwrap())
}
}

334
examples/readme.rs Normal file
View file

@ -0,0 +1,334 @@
use std::{io::Write, iter::Peekable};
use cstree::{
interning::Interner,
prelude::*,
syntax::{ResolvedElementRef, ResolvedNode},
};
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
pub enum SyntaxKind {
/* Tokens */
Int, // 42
Plus, // +
Minus, // -
LParen, // (
RParen, // )
/* Nodes */
Expr,
Root,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Calculator;
impl Language for Calculator {
// The tokens and nodes we just defined
type Kind = SyntaxKind;
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
// This just needs to be the inverse of `kind_to_raw`, but could also
// be an `impl TryFrom<u16> for SyntaxKind` or any other conversion.
match raw.0 {
0 => SyntaxKind::Int,
1 => SyntaxKind::Plus,
2 => SyntaxKind::Minus,
3 => SyntaxKind::LParen,
4 => SyntaxKind::RParen,
5 => SyntaxKind::Expr,
6 => SyntaxKind::Root,
n => panic!("Unknown raw syntax kind: {n}"),
}
}
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
RawSyntaxKind(kind as u16)
}
fn static_text(kind: Self::Kind) -> Option<&'static str> {
match kind {
SyntaxKind::Plus => Some("+"),
SyntaxKind::Minus => Some("-"),
SyntaxKind::LParen => Some("("),
SyntaxKind::RParen => Some(")"),
_ => None,
}
}
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum Token<'input> {
Int(&'input str),
Plus,
Minus,
LParen,
RParen,
EoF,
}
pub struct Lexer<'input> {
input: &'input str,
at_eof: bool,
}
impl<'input> Lexer<'input> {
pub fn new(input: &'input str) -> Self {
Self { input, at_eof: false }
}
fn next_token(&mut self) -> Result<Token<'input>, String> {
loop {
let Some(next_char) = self.input.chars().next() else {
self.at_eof = true;
return Ok(Token::EoF);
};
let token = match next_char {
'+' => Token::Plus,
'-' => Token::Minus,
'(' => Token::LParen,
')' => Token::RParen,
c if c.is_ascii_digit() => {
let (last_digit_idx, _char) = self
.input
.char_indices()
.take_while(|(_idx, c)| c.is_ascii_digit())
.last()
.expect("matched at least one");
// Advance lexer
let number = Token::Int(&self.input[..=last_digit_idx]);
self.input = &self.input[(last_digit_idx + 1)..];
return Ok(number);
}
c if c.is_whitespace() => {
// Skip whitespace
let (last_ws_idx, _char) = self
.input
.char_indices()
.take_while(|(_idx, c)| c.is_whitespace())
.last()
.expect("matched at least one");
// Advance lexer
self.input = &self.input[(last_ws_idx + 1)..];
continue;
}
c => return Err(format!("Unknown start of token: '{c}'")),
};
// Advance lexer
self.input = &self.input[1..];
return Ok(token);
}
}
}
impl<'input> Iterator for Lexer<'input> {
type Item = Token<'input>;
fn next(&mut self) -> Option<Self::Item> {
if self.at_eof {
None
} else {
Some(self.next_token().expect("Failed to lex input"))
}
}
}
pub struct Parser<'input> {
lexer: Peekable<Lexer<'input>>,
builder: GreenNodeBuilder<'static, 'static, Calculator>,
}
impl<'input> Parser<'input> {
pub fn new(input: &'input str) -> Self {
Self {
lexer: Lexer::new(input).peekable(),
builder: GreenNodeBuilder::new(),
}
}
pub fn bump(&mut self) -> Option<Token<'input>> {
self.lexer.next()
}
pub fn parse(&mut self) -> Result<(), String> {
self.builder.start_node(SyntaxKind::Root);
self.parse_expr()?;
self.builder.finish_node();
Ok(())
}
fn parse_lhs(&mut self) -> Result<(), String> {
// An expression may start either with a number, or with an opening parenthesis that is the start of a
// parenthesized expression
let next_token = *self.lexer.peek().unwrap();
match next_token {
Token::Int(n) => {
self.bump();
self.builder.token(SyntaxKind::Int, n);
}
Token::LParen => {
// Wrap the grouped expression inside a node containing it and its parentheses
self.builder.start_node(SyntaxKind::Expr);
self.bump();
self.builder.static_token(SyntaxKind::LParen);
self.parse_expr()?; // Inner expression
if self.bump() != Some(Token::RParen) {
return Err("Missing ')'".to_string());
}
self.builder.static_token(SyntaxKind::RParen);
self.builder.finish_node();
}
Token::EoF => return Err("Unexpected end of file: expected expression".to_string()),
t => return Err(format!("Unexpected start of expression: '{t:?}'")),
}
Ok(())
}
fn parse_expr(&mut self) -> Result<(), String> {
// Remember our current position
let before_expr = self.builder.checkpoint();
// Parse the start of the expression
self.parse_lhs()?;
// Check if the expression continues with `+ <more>` or `- <more>`
let Some(next_token) = self.lexer.peek() else {
return Ok(());
};
let op = match *next_token {
Token::Plus => SyntaxKind::Plus,
Token::Minus => SyntaxKind::Minus,
Token::RParen | Token::EoF => return Ok(()),
t => return Err(format!("Expected operator, found '{t:?}'")),
};
// If so, retroactively wrap the (already parsed) LHS and the following RHS inside an `Expr` node
self.builder.start_node_at(before_expr, SyntaxKind::Expr);
self.bump();
self.builder.static_token(op);
self.parse_expr()?; // RHS
self.builder.finish_node();
Ok(())
}
pub fn finish(mut self) -> (GreenNode, impl Interner) {
assert!(self.lexer.next().map(|t| t == Token::EoF).unwrap_or(true));
let (tree, cache) = self.builder.finish();
(tree, cache.unwrap().into_interner().unwrap())
}
}
fn main() {
use std::io;
let mut buf = String::new();
loop {
print!("Enter expression: ");
io::stdout().flush().unwrap();
buf.clear();
if let Err(e) = io::stdin().read_line(&mut buf) {
eprintln!("Error reading input: {e}");
continue;
}
let mut parser = Parser::new(&buf);
if let Err(e) = parser.parse() {
eprintln!("Parse error: {e}");
continue;
}
let (tree, interner) = parser.finish();
let root = SyntaxNode::<Calculator>::new_root_with_resolver(tree, interner);
if let Some(expr) = root.first_child_or_token() {
let result = eval_elem(expr, &mut root.children_with_tokens());
println!("Result: {result}");
}
}
}
fn eval(expr: &ResolvedNode<Calculator>) -> i64 {
let mut children = expr.children_with_tokens();
let lhs = eval_elem(children.next().expect("empty expr"), &mut children);
let Some(op) = children.next().map(|elem| elem.kind()) else {
// Literal expression
return lhs;
};
let rhs = eval_elem(children.next().expect("missing RHS"), &mut children);
match op {
SyntaxKind::Plus => lhs + rhs,
SyntaxKind::Minus => lhs - rhs,
_ => unreachable!("invalid op"),
}
}
fn eval_elem<'e>(
expr: ResolvedElementRef<'_, Calculator>,
children: &mut impl Iterator<Item = ResolvedElementRef<'e, Calculator>>,
) -> i64 {
use cstree::util::NodeOrToken;
match expr {
NodeOrToken::Node(n) => {
assert_eq!(n.kind(), SyntaxKind::Expr);
eval(n)
}
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::Int => {
let number_str = t.text();
number_str.parse().expect("parsed int could not be evaluated")
}
SyntaxKind::LParen => {
let inner = children.next().expect("missing content inside parens");
// It's important that we consume the `)` here, as otherwise `eval` might mistake it for an operator
assert_eq!(
children
.next()
.and_then(|elem| elem.into_token())
.map(|token| token.kind()),
Some(SyntaxKind::RParen)
);
eval_elem(inner, children)
}
_ => unreachable!("invalid start of expression"),
},
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lex() {
let input = "11 + 2-(5 + 4)";
let lexer = Lexer::new(input);
let tokens: Vec<_> = lexer.into_iter().collect();
assert_eq!(
tokens,
vec![
Token::Int("11"),
Token::Plus,
Token::Int("2"),
Token::Minus,
Token::LParen,
Token::Int("5"),
Token::Plus,
Token::Int("4"),
Token::RParen,
Token::EoF
]
);
}
#[test]
fn parse() {
let input = "11 + 2-(5 + 4)";
let mut parser = Parser::new(input);
parser.parse().unwrap();
let (tree, interner) = parser.finish();
let root = SyntaxNode::<Calculator>::new_root_with_resolver(tree, interner);
dbg!(root);
}
}

View file

@ -30,7 +30,7 @@ use SyntaxKind::*;
/// in order to not need the user's `enum SyntaxKind` as a type parameter.
///
/// First, to easily pass the enum variants into cstree via `.into()`:
impl From<SyntaxKind> for cstree::SyntaxKind {
impl From<SyntaxKind> for cstree::RawSyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
@ -44,12 +44,12 @@ pub enum Lang {}
impl cstree::Language for Lang {
type Kind = SyntaxKind;
fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
assert!(raw.0 <= Root as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
kind.into()
}
@ -66,14 +66,11 @@ impl cstree::Language for Lang {
/// offsets and parent pointers.
/// cstree also deduplicates the actual source string in addition to the tree nodes, so we will need
/// the Resolver to get the real text back from the interned representation.
use cstree::{
interning::{IntoResolver, Resolver},
GreenNode, Language,
};
use cstree::{green::GreenNode, interning::Resolver, Language};
/// You can construct GreenNodes by hand, but a builder is helpful for top-down parsers: it maintains
/// a stack of currently in-progress nodes.
use cstree::GreenNodeBuilder;
use cstree::build::GreenNodeBuilder;
/// The parse results are stored as a "green tree".
/// We'll discuss how to work with the results later.
@ -135,7 +132,7 @@ fn parse(text: &str) -> Parse<impl Resolver> {
let (tree, cache) = self.builder.finish();
Parse {
green_node: tree,
resolver: cache.unwrap().into_interner().unwrap().into_resolver(),
resolver: cache.unwrap().into_interner().unwrap(),
errors: self.errors,
}
}
@ -213,11 +210,11 @@ fn parse(text: &str) -> Parse<impl Resolver> {
/// To work with the parse results we need a view into the green tree - the syntax tree.
/// It is also immutable, like a GreenNode, but it contains parent pointers, offsets, and has
/// identity semantics.
type SyntaxNode = cstree::SyntaxNode<Lang>;
type SyntaxNode = cstree::syntax::SyntaxNode<Lang>;
#[allow(unused)]
type SyntaxToken = cstree::SyntaxToken<Lang>;
type SyntaxToken = cstree::syntax::SyntaxToken<Lang>;
#[allow(unused)]
type SyntaxElement = cstree::SyntaxElement<Lang>;
type SyntaxElement = cstree::syntax::SyntaxElement<Lang>;
impl<I> Parse<I> {
fn syntax(&self) -> SyntaxNode {
@ -355,8 +352,10 @@ impl ast::Atom {
}
fn text<'r>(&self, resolver: &'r impl Resolver) -> &'r str {
match &self.0.green().children().next() {
Some(cstree::NodeOrToken::Token(token)) => Lang::static_text(Lang::kind_from_raw(token.kind()))
use cstree::util::NodeOrToken;
match self.0.green().children().next() {
Some(NodeOrToken::Token(token)) => Lang::static_text(Lang::kind_from_raw(token.kind()))
.or_else(|| token.text(resolver))
.unwrap(),
_ => unreachable!(),
@ -422,7 +421,7 @@ nan
/// Split the input string into a flat list of tokens (such as L_PAREN, WORD, and WHITESPACE)
fn lex(text: &str) -> VecDeque<(SyntaxKind, &str)> {
fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
m_lexer::TokenKind(cstree::SyntaxKind::from(t).0)
m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0)
}
fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
match t.0 {

50
examples/salsa.rs Normal file
View file

@ -0,0 +1,50 @@
#![cfg(feature = "salsa_2022_compat")]
use cstree::{build::GreenNodeBuilder, impl_cstree_interning_for_salsa};
#[salsa::jar(db = Db)]
pub struct Jar(crate::SourceId);
pub trait Db: salsa::DbWithJar<Jar> {}
impl<DB> Db for DB where DB: ?Sized + salsa::DbWithJar<Jar> {}
#[salsa::interned]
pub struct SourceId {
#[return_ref]
pub text: String,
}
#[derive(Default)]
#[salsa::db(crate::Jar)]
struct Database {
storage: salsa::Storage<Self>,
}
impl salsa::Database for Database {}
impl_cstree_interning_for_salsa!(impl Interning for Database => text as SourceId);
use cstree::{syntax::SyntaxNode, testing::*};
fn main() {
let db = Database::default();
let interned = SourceId::new(&db, "foo".to_string());
let original = interned.text(&db);
assert_eq!(original, "foo");
let interner = db.as_interner();
let mut shared_interner = &interner;
let mut builder: GreenNodeBuilder<TestLang, _> = GreenNodeBuilder::with_interner(&mut shared_interner);
let (tree, _no_interner_because_it_was_borrowed) = {
builder.start_node(TestSyntaxKind::Plus);
builder.token(TestSyntaxKind::Float, "2.05");
builder.token(TestSyntaxKind::Whitespace, " ");
builder.token(TestSyntaxKind::Plus, "+");
builder.token(TestSyntaxKind::Whitespace, " ");
builder.token(TestSyntaxKind::Float, "7.32");
builder.finish_node();
builder.finish()
};
let tree: SyntaxNode<TestLang> = SyntaxNode::new_root(tree);
assert_eq!(tree.resolve_text(shared_interner), "2.05 + 7.32");
}

View file

@ -1,10 +1,9 @@
//! Implementation of the inner, "green" tree.
//! The [`GreenNodeBuilder`] is the main entry point to constructing [`GreenNode`]s and
//! [`GreenToken`]s.
//! The [`GreenNodeBuilder`](crate::build::GreenNodeBuilder) from the [`build` module](crate::build) is the main entry
//! point to constructing [`GreenNode`]s and [`GreenToken`]s.
mod builder;
pub(super) mod builder;
mod element;
mod interner;
mod iter;
mod node;
mod token;
@ -12,17 +11,7 @@ mod token;
pub(crate) use self::element::GreenElementRef;
use self::element::{GreenElement, PackedGreenElement};
pub use self::{
builder::{Checkpoint, GreenNodeBuilder, NodeCache},
interner::TokenInterner,
iter::GreenNodeChildren,
node::GreenNode,
token::GreenToken,
};
/// SyntaxKind is a type tag for each token or node.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct SyntaxKind(pub u16);
pub use self::{iter::GreenNodeChildren, node::GreenNode, token::GreenToken};
#[cfg(test)]
mod tests {

View file

@ -4,10 +4,11 @@ use fxhash::{FxHashMap, FxHasher32};
use text_size::TextSize;
use crate::{
green::{interner::TokenInterner, GreenElement, GreenNode, GreenToken, SyntaxKind},
interning::{Interner, Key},
green::{GreenElement, GreenNode, GreenToken},
interning::{new_interner, Interner, TokenInterner, TokenKey},
util::NodeOrToken,
utility_types::MaybeOwned,
Language, NodeOrToken,
Language, RawSyntaxKind,
};
use super::{node::GreenNodeHead, token::GreenTokenData};
@ -35,6 +36,8 @@ impl NodeCache<'static> {
/// # Examples
/// ```
/// # use cstree::testing::{*, Language as _};
/// use cstree::build::NodeCache;
///
/// // Build a tree
/// let mut cache = NodeCache::new();
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::with_cache(&mut cache);
@ -53,7 +56,7 @@ impl NodeCache<'static> {
Self {
nodes: FxHashMap::default(),
tokens: FxHashMap::default(),
interner: MaybeOwned::Owned(TokenInterner::new()),
interner: MaybeOwned::Owned(new_interner()),
}
}
}
@ -66,19 +69,21 @@ impl Default for NodeCache<'static> {
impl<'i, I> NodeCache<'i, I>
where
I: Interner,
I: Interner<TokenKey>,
{
/// Constructs a new, empty cache that will use the given interner to deduplicate source text
/// (strings) across tokens.
/// # Examples
/// ```
/// # use cstree::testing::{*, Language as _};
/// use lasso::Rodeo;
/// # use cstree::interning::*;
/// use cstree::build::NodeCache;
///
/// // Create the builder from a custom `Rodeo`
/// let mut interner = Rodeo::new();
/// // Create the builder from a custom interner
/// let mut interner = new_interner();
/// let mut cache = NodeCache::with_interner(&mut interner);
/// let mut builder: GreenNodeBuilder<MyLanguage, Rodeo> = GreenNodeBuilder::with_cache(&mut cache);
/// let mut builder: GreenNodeBuilder<MyLanguage, TokenInterner> =
/// GreenNodeBuilder::with_cache(&mut cache);
///
/// // Construct the tree
/// # builder.start_node(Root);
@ -107,12 +112,14 @@ where
/// # Examples
/// ```
/// # use cstree::testing::{*, Language as _};
/// use lasso::Rodeo;
/// # use cstree::interning::*;
/// use cstree::build::NodeCache;
///
/// // Create the builder from a custom `Rodeo`
/// let mut interner = Rodeo::new();
/// // Create the builder from a custom interner
/// let mut interner = new_interner();
/// let cache = NodeCache::from_interner(interner);
/// let mut builder: GreenNodeBuilder<MyLanguage, Rodeo> = GreenNodeBuilder::from_cache(cache);
/// let mut builder: GreenNodeBuilder<MyLanguage, TokenInterner> =
/// GreenNodeBuilder::from_cache(cache);
///
/// // Construct the tree
/// # builder.start_node(Root);
@ -142,22 +149,23 @@ where
/// See also [`interner_mut`](NodeCache::interner_mut).
#[inline]
pub fn interner(&self) -> &I {
&*self.interner
&self.interner
}
/// Get a mutable reference to the interner used to deduplicate source text (strings).
/// # Examples
/// ```
/// # use cstree::*;
/// # use cstree::build::*;
/// # use cstree::interning::*;
/// let mut cache = NodeCache::new();
/// let interner = cache.interner_mut();
/// let key = interner.get_or_intern("foo");
/// assert_eq!(interner.resolve(&key), "foo");
/// assert_eq!(interner.resolve(key), "foo");
/// ```
#[inline]
pub fn interner_mut(&mut self) -> &mut I {
&mut *self.interner
&mut self.interner
}
/// If this node cache was constructed with [`new`](NodeCache::new) or
@ -196,7 +204,7 @@ where
}
#[inline(always)]
fn intern(&mut self, text: &str) -> Key {
fn intern(&mut self, text: &str) -> TokenKey {
self.interner.get_or_intern(text)
}
@ -205,7 +213,7 @@ where
#[inline]
fn get_cached_node(
&mut self,
kind: SyntaxKind,
kind: RawSyntaxKind,
children: std::vec::Drain<'_, GreenElement>,
text_len: TextSize,
child_hash: u32,
@ -221,7 +229,7 @@ where
.clone()
}
fn token<L: Language>(&mut self, kind: L::Kind, text: Option<Key>, len: u32) -> GreenToken {
fn token<L: Language>(&mut self, kind: L::Kind, text: Option<TokenKey>, len: u32) -> GreenToken {
let text_len = TextSize::from(len);
let kind = L::kind_to_raw(kind);
let data = GreenTokenData { kind, text, text_len };
@ -246,7 +254,6 @@ pub struct Checkpoint(usize);
/// # Examples
/// ```
/// # use cstree::testing::{*, Language as _};
/// # use cstree::interning::IntoResolver;
/// // Build a tree
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
/// builder.start_node(Root);
@ -258,7 +265,7 @@ pub struct Checkpoint(usize);
/// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root));
/// let int = tree.children().next().unwrap();
/// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int));
/// let resolver = cache.unwrap().into_interner().unwrap().into_resolver();
/// let resolver = cache.unwrap().into_interner().unwrap();
/// assert_eq!(int.as_token().unwrap().text(&resolver), Some("42"));
/// ```
#[derive(Debug)]
@ -288,7 +295,7 @@ impl<L: Language> Default for GreenNodeBuilder<'static, 'static, L> {
impl<'cache, 'interner, L, I> GreenNodeBuilder<'cache, 'interner, L, I>
where
L: Language,
I: Interner,
I: Interner<TokenKey>,
{
/// Reusing a [`NodeCache`] between multiple builders saves memory, as it allows to structurally
/// share underlying trees.
@ -306,6 +313,7 @@ where
/// # Examples
/// ```
/// # use cstree::testing::{*, Language as _};
/// # use cstree::build::*;
/// // Construct a builder from our own cache
/// let cache = NodeCache::new();
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::from_cache(cache);
@ -358,7 +366,7 @@ where
/// See also [`interner_mut`](GreenNodeBuilder::interner_mut).
#[inline]
pub fn interner(&self) -> &I {
&*self.cache.interner
&self.cache.interner
}
/// Get a mutable reference to the interner used to deduplicate source text (strings).
@ -367,20 +375,19 @@ where
/// # Examples
/// ```
/// # use cstree::testing::*;
/// # use cstree::build::*;
/// # use cstree::interning::*;
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
/// let interner = builder.interner_mut();
/// let key = interner.get_or_intern("foo");
/// assert_eq!(interner.resolve(&key), "foo");
/// assert_eq!(interner.resolve(key), "foo");
/// ```
#[inline]
pub fn interner_mut(&mut self) -> &mut I {
&mut *self.cache.interner
&mut self.cache.interner
}
/// Add a new token to the current branch without storing an explicit section of text.
/// This is be useful if the text can always be inferred from the token's `kind`, for example
/// when using kinds for specific operators or punctuation.
/// Add a new token with the given `text` to the current node.
///
/// ## Panics
/// In debug mode, if `kind` has static text, this function will verify that `text` matches that text.
@ -403,6 +410,22 @@ where
self.children.push(token.into());
}
/// Add a new token to the current node without storing an explicit section of text.
/// This is be useful if the text can always be inferred from the token's `kind`, for example
/// when using kinds for specific operators or punctuation.
///
/// For tokens whose textual representation is not static, such as numbers or identifiers, use
/// [`token`](GreenNodeBuilder::token).
///
/// ## Panics
/// If `kind` does not have static text, i.e., `L::static_text(kind)` returns `None`.
#[inline]
pub fn static_token(&mut self, kind: L::Kind) {
let static_text = L::static_text(kind).unwrap_or_else(|| panic!("Missing static text for '{kind:?}'"));
let token = self.cache.token::<L>(kind, None, static_text.len() as u32);
self.children.push(token.into());
}
/// Start new node of the given `kind` and make it current.
#[inline]
pub fn start_node(&mut self, kind: L::Kind) {
@ -427,7 +450,7 @@ where
/// # Examples
/// ```
/// # use cstree::testing::*;
/// # use cstree::{GreenNodeBuilder, Language};
/// # use cstree::{build::GreenNodeBuilder, Language};
/// # struct Parser;
/// # impl Parser {
/// # fn peek(&self) -> Option<TestSyntaxKind> { None }

View file

@ -7,8 +7,10 @@ type ErasedPtr = *const u8;
use sptr::Strict;
use crate::{
green::{GreenNode, GreenToken, SyntaxKind},
NodeOrToken, TextSize,
green::{GreenNode, GreenToken},
text::TextSize,
util::NodeOrToken,
RawSyntaxKind,
};
pub(super) type GreenElement = NodeOrToken<GreenNode, GreenToken>;
@ -64,7 +66,7 @@ impl From<GreenToken> for PackedGreenElement {
impl GreenElement {
/// Returns kind of this element.
#[inline]
pub fn kind(&self) -> SyntaxKind {
pub fn kind(&self) -> RawSyntaxKind {
self.as_ref().kind()
}
@ -78,7 +80,7 @@ impl GreenElement {
impl GreenElementRef<'_> {
/// Returns kind of this element.
#[inline]
pub fn kind(&self) -> SyntaxKind {
pub fn kind(&self) -> RawSyntaxKind {
match self {
NodeOrToken::Node(it) => it.kind(),
NodeOrToken::Token(it) => it.kind(),

View file

@ -1,126 +0,0 @@
use std::num::NonZeroUsize;
use crate::interning::{
Capacity, Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Key, Reader, Resolver, Rodeo,
};
use fxhash::FxBuildHasher;
/// The default [`Interner`] used to deduplicate green token strings.
#[derive(Debug)]
pub struct TokenInterner {
rodeo: Rodeo,
}
impl TokenInterner {
pub(super) fn new() -> Self {
Self {
rodeo: Rodeo::with_capacity_and_hasher(
// capacity values suggested by author of `lasso`
Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }),
FxBuildHasher::default(),
),
}
}
}
impl Resolver for TokenInterner {
#[inline]
fn resolve<'a>(&'a self, key: &Key) -> &'a str {
self.rodeo.resolve(key)
}
#[inline]
fn try_resolve<'a>(&'a self, key: &Key) -> Option<&'a str> {
self.rodeo.try_resolve(key)
}
#[inline]
unsafe fn resolve_unchecked<'a>(&'a self, key: &Key) -> &'a str {
self.rodeo.resolve_unchecked(key)
}
#[inline]
fn contains_key(&self, key: &Key) -> bool {
self.rodeo.contains_key(key)
}
#[inline]
fn len(&self) -> usize {
self.rodeo.len()
}
}
impl Reader for TokenInterner {
#[inline]
fn get(&self, val: &str) -> Option<Key> {
self.rodeo.get(val)
}
#[inline]
fn contains(&self, val: &str) -> bool {
self.rodeo.contains(val)
}
}
impl IntoResolver for TokenInterner {
type Resolver = <Rodeo as IntoResolver>::Resolver;
#[inline]
fn into_resolver(self) -> Self::Resolver
where
Self: 'static,
{
self.rodeo.into_resolver()
}
#[inline]
fn into_resolver_boxed(self: Box<Self>) -> Self::Resolver
where
Self: 'static,
{
Rodeo::into_resolver_boxed(Box::new(self.rodeo))
}
}
impl Interner for TokenInterner {
#[inline]
fn get_or_intern(&mut self, val: &str) -> Key {
self.rodeo.get_or_intern(val)
}
#[inline]
fn try_get_or_intern(&mut self, val: &str) -> lasso::LassoResult<Key> {
self.rodeo.try_get_or_intern(val)
}
#[inline]
fn get_or_intern_static(&mut self, val: &'static str) -> Key {
self.rodeo.get_or_intern_static(val)
}
#[inline]
fn try_get_or_intern_static(&mut self, val: &'static str) -> lasso::LassoResult<Key> {
self.rodeo.try_get_or_intern_static(val)
}
}
impl IntoReader for TokenInterner {
type Reader = <Rodeo as IntoReader>::Reader;
#[inline]
fn into_reader(self) -> Self::Reader
where
Self: 'static,
{
self.rodeo.into_reader()
}
fn into_reader_boxed(self: Box<Self>) -> Self::Reader
where
Self: 'static,
{
Rodeo::into_reader_boxed(Box::new(self.rodeo))
}
}
impl IntoReaderAndResolver for TokenInterner {}

View file

@ -4,7 +4,7 @@ use std::{iter::FusedIterator, slice};
use super::{element::PackedGreenElement, GreenElementRef};
/// An iterator over a [`GreenNode`](crate::GreenNode)'s children.
/// An iterator over a [`GreenNode`](crate::green::GreenNode)'s children.
#[derive(Debug, Clone)]
pub struct GreenNodeChildren<'a> {
pub(super) inner: slice::Iter<'a, PackedGreenElement>,

View file

@ -6,15 +6,16 @@ use std::{
use fxhash::FxHasher32;
use crate::{
green::{iter::GreenNodeChildren, GreenElement, PackedGreenElement, SyntaxKind},
TextSize,
green::{iter::GreenNodeChildren, GreenElement, PackedGreenElement},
text::TextSize,
RawSyntaxKind,
};
use triomphe::{Arc, HeaderWithLength, ThinArc};
#[repr(align(2))] //to use 1 bit for pointer tagging. NB: this is an at-least annotation
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(super) struct GreenNodeHead {
pub(super) kind: SyntaxKind,
pub(super) kind: RawSyntaxKind,
pub(super) text_len: TextSize,
pub(super) child_hash: u32,
}
@ -35,7 +36,7 @@ impl std::fmt::Debug for GreenNode {
impl GreenNode {
/// Creates a new Node.
#[inline]
pub fn new<I>(kind: SyntaxKind, children: I) -> GreenNode
pub fn new<I>(kind: RawSyntaxKind, children: I) -> GreenNode
where
I: IntoIterator<Item = GreenElement>,
I::IntoIter: ExactSizeIterator,
@ -72,7 +73,7 @@ impl GreenNode {
/// Creates a new Node.
#[inline]
pub(super) fn new_with_len_and_hash<I>(
kind: SyntaxKind,
kind: RawSyntaxKind,
children: I,
text_len: TextSize,
child_hash: u32,
@ -115,9 +116,9 @@ impl GreenNode {
}
}
/// [`SyntaxKind`] of this node.
/// [`RawSyntaxKind`] of this node.
#[inline]
pub fn kind(&self) -> SyntaxKind {
pub fn kind(&self) -> RawSyntaxKind {
self.data.header.header.kind
}

View file

@ -1,9 +1,9 @@
use std::{fmt, hash, mem::ManuallyDrop, ptr::NonNull};
use crate::{
green::SyntaxKind,
interning::{Key, Resolver},
TextSize,
interning::{Resolver, TokenKey},
text::TextSize,
RawSyntaxKind,
};
use sptr::Strict;
use triomphe::Arc;
@ -11,8 +11,8 @@ use triomphe::Arc;
#[repr(align(2))] // to use 1 bit for pointer tagging. NB: this is an at-least annotation
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
pub(super) struct GreenTokenData {
pub(super) kind: SyntaxKind,
pub(super) text: Option<Key>,
pub(super) kind: RawSyntaxKind,
pub(super) text: Option<TokenKey>,
pub(super) text_len: TextSize,
}
@ -54,9 +54,9 @@ impl GreenToken {
}
}
/// [`SyntaxKind`] of this Token.
/// [`RawSyntaxKind`] of this Token.
#[inline]
pub fn kind(&self) -> SyntaxKind {
pub fn kind(&self) -> RawSyntaxKind {
self.data().kind
}
@ -64,9 +64,9 @@ impl GreenToken {
#[inline]
pub fn text<'i, I>(&self, resolver: &'i I) -> Option<&'i str>
where
I: Resolver + ?Sized,
I: Resolver<TokenKey> + ?Sized,
{
self.data().text.map(|key| resolver.resolve(&key))
self.data().text.map(|key| resolver.resolve(key))
}
/// Returns the length of text covered by this token.
@ -80,7 +80,7 @@ impl GreenToken {
///
/// See also [`text`](GreenToken::text).
#[inline]
pub fn text_key(&self) -> Option<Key> {
pub fn text_key(&self) -> Option<TokenKey> {
self.data().text
}
}

View file

@ -1,47 +1,186 @@
//! Types and Traits for efficient String storage and deduplication.
//!
//! Interning functionality is provided by the [`lasso`](lasso) crate.
//! Because `cstree` is aimed at _concrete_ syntax trees that faithfully represent all of the original program input,
//! `cstree` aks for the text of each token when building a syntax tree. You'll notice this when looking at
//! [`GreenNodeBuilder::token`], which takes the kind of token and a refernce to the text of the token in the source.
//!
//! Of course, there are tokens whose text will always be the same, such as punctuation (like a semicolon), keywords
//! (like `fn`), or operators (like `<=`). Use [`Language::static_text`] when implementing `Language` to make `cstree`
//! aware of such tokens.
//!
//! There is, however, another category of tokens whose text will appear repeatedly, but for which we cannot know the
//! text upfront. Any variable, type, or method that is user-defined will likely be named more than once, but there is
//! no way to know beforehand what names a user will choose.
//!
//! In order to avoid storing the source text for these tokens many times over, `cstree` _interns_ the text of its
//! tokens (if that text is not static). What this means is that each unique string is only stored once. When a new
//! token is added - say, a variable -, we check if we already know its contents (the variable name). If the text is
//! new, we save it and give it a unique Id. If we have seen the text before, we look up its unique Id and don't need to
//! keep the new data around. As an additional benefit, interning also makes it much cheaper to copy source text around
//! and also to compare it with other source text, since what is actually being copied or compared is just an integer.
//!
//! ## I just want to build a syntax tree
//!
//! If you don't want to worry about this for now, you (mostly) can! All required functionality is implemented in
//! `cstree` and you can just use [`GreenNodeBuilder::new`] to obtain a tree builder with everything set up (see the
//! [crate documentation] for more on how to get started). This will create an interner, which the builder returns
//! together with the syntax tree on [`finish`] as part of its node cache (call [`NodeCache::into_interner`] on the
//! result to get the interner out).
//!
//! Here begins the part where you do have to think about interning: `cstree` needs the interner you get when you want
//! to look at the source text for some part of the syntax tree, so you'll have to keep it around somehow until the
//! point where you need it.
//!
//! How best to do this depends on what you need the text for. If the code that accesses the text is close-by, it might
//! be enough to pass the return value to the functions that need it (within `cstree` or in your code). Other options
//! could be to store the interner together with the syntax tree. If you use [`SyntaxNode::new_root_with_resolver`], you
//! get a syntax tree that can handle text without any need to manage and pass an interner (the reason the method is
//! called `_with_resolver` and not `_with_interner` is that it doesn't actually needs a full [`Interner`] -- once the
//! tree is created, no more text will be added, so it just needs to be able to look up text. This part is called a
//! [`Resolver`]). Or you could put the interner somewhere "global", where you can easily access it from anywhere.
//!
//! ## Using other interners
//!
//! By default, `cstree` uses its own, simple interner implementation. You can obtain an interner by calling
//! [`new_interner`], or bring your own by implementing the [`Resolver`] and [`Interner`] traits defined in this module.
//! Most methods in `cstree` require that you support interning [`TokenKey`]s. `TokenKey` implements [`InternKey`], so
//! your implementation can use that to convert to whatever types it uses for its internal representation. Note that
//! there is no way to change the size of the internal representation.
//!
//! ### `lasso`
//! Using features, you can enable support for some third-party interners. The primary one is [`lasso`], a crate focused
//! on efficient interning of text strings. This is enabled via the `lasso_compat` feature and adds the necessary trait
//! implementation to make `lasso`'s interners work with `cstree` (as well as a re-export of the matching version of
//! `lasso` here). If enabled, `cstree`'s built-in interning functionality is replaced with `lasso`'s more efficient one
//! transparently, so you'll now be returned a `lasso` interner from [`new_interner`].
//!
//! ### `salsa`
//! If you are using the "2022" version of the `salsa` incremental query framework, it is possible to use its interning
//! capabilities with `cstree` as well. Support for this is experimental, and you have to opt in via the
//! `salsa_2022_compat` feature. For instructions on how to do this, and whether you actually want to, please refer to
//! [the `salsa_compat` module documentation].
//!
//! ## Multi-threaded interners
//! If you want to use your interner on more than one thread, the interner needs to support interning new text through
//! shared access. With the `multi_threaded_interning` feature, you can get such an interner by calling
//! [`new_threaded_interner`]. The feature also enables support for `ThreadedRodeo`, the multi-threaded interner from
//! `lasso`.
//!
//! **You can pass a reference to that interner to anything that expects an [`Interner`]!**
//! While the interning methods on [`Interner`] require a `&mut self` to also work for single-threaded interners, both
//! [`Resolver`] and [`Interner`] will be implemented for `&interner` if `interner` is multi-threaded:
//!
//! ```
//! # use cstree::testing::{*, Language as _};
//! # use cstree::interning::*;
//!
//! let interner = new_threaded_interner();
//! let mut builder: GreenNodeBuilder<MyLanguage, &MultiThreadedTokenInterner> =
//! GreenNodeBuilder::from_interner(&interner);
//!
//! # builder.start_node(Root);
//! # builder.token(Int, "42");
//! # builder.finish_node();
//! parse(&mut builder, "42");
//! let (tree, cache) = builder.finish();
//!
//! // Note that we get a cache and interner back, because we passed an "owned" reference to `from_interner`
//! let used_interner = cache.unwrap().into_interner().unwrap();
//! assert_eq!(used_interner as *const _, &interner as *const _);
//!
//! let int = tree.children().next().unwrap();
//! assert_eq!(int.as_token().unwrap().text(&interner), Some("42"));
//! ```
//!
//! Here, we use `from_interner`, but pass it only a shared reference to "own". Take care to denote the type signature
//! of the `GreenNodeBuilder` appropriately.
//!
//! [crate documentation]: crate
//! [`Language::static_text`]: crate::Language::static_text
//! [`GreenNodeBuilder::token`]: crate::build::GreenNodeBuilder::token
//! [`GreenNodeBuilder::new`]: crate::build::GreenNodeBuilder::new
//! [`finish`]: crate::build::GreenNodeBuilder::finish
//! [`NodeCache::into_interner`]: crate::build::NodeCache::into_interner
//! [`SyntaxNode::new_root_with_resolver`]: crate::syntax::SyntaxNode::new_root_with_resolver
//! [`lasso`]: lasso
//! [the `salsa_compat` module documentation]: salsa_compat
pub use fxhash::FxBuildHasher as Hasher;
mod traits;
pub use self::traits::*;
pub use crate::green::TokenInterner;
mod default_interner;
/// The index type for all interners. Each key represents
pub type Key = lasso::Spur;
pub use lasso::{Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver};
#[cfg(not(feature = "lasso_compat"))]
#[doc(inline)]
pub use default_interner::TokenInterner;
/// A string interner that caches strings quickly with a minimal memory footprint, returning a unique key to re-access
/// it with `O(1)` times. By default, `Rodeo` uses an [`fxhash`] [`Hasher`].
pub type Rodeo<S = Hasher> = lasso::Rodeo<Key, S>;
#[cfg(feature = "lasso_compat")]
mod lasso_compat;
/// Constructs a new, single-threaded interner.
#[cfg(feature = "lasso_compat")]
#[doc(inline)]
pub use lasso_compat::TokenInterner;
#[cfg(feature = "multi_threaded_interning")]
#[doc(inline)]
pub use lasso_compat::MultiThreadedTokenInterner;
#[cfg(feature = "lasso_compat")]
#[cfg_attr(doc_cfg, doc(cfg(feature = "lasso_compat")))]
pub use lasso;
#[cfg(feature = "salsa_2022_compat")]
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
pub mod salsa_compat;
use core::fmt;
use std::num::NonZeroU32;
/// The intern key type for the source text of [`GreenToken`s](crate::green::GreenToken).
/// Each unique key uniquely identifies a deduplicated, interned source string.
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
#[repr(transparent)]
pub struct TokenKey {
inner: NonZeroU32,
}
// Safety: we match `+ 1` and `- 1`, so it is always possible to round-trip.
unsafe impl InternKey for TokenKey {
#[inline]
fn into_u32(self) -> u32 {
self.inner.get() - 1
}
fn try_from_u32(key: u32) -> Option<Self> {
(key < u32::MAX).then(|| Self {
// Safety: non-zero by increment.
// Overflow is impossible under the check above.
inner: unsafe { NonZeroU32::new_unchecked(key + 1) },
})
}
}
impl fmt::Debug for TokenKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_fmt(format_args!("TokenKey({})", self.inner))
}
}
/// Constructs a new, single-threaded [`Interner`](traits::Interner).
///
/// If you need the interner to be multi-threaded, see [`new_threaded_interner`].
#[inline]
pub fn new_interner() -> Rodeo {
Rodeo::with_hasher(Hasher::default())
pub fn new_interner() -> TokenInterner {
TokenInterner::new()
}
/// A string interner that caches strings quickly with a minimal memory footprint, returning a unique key to re-access
/// it with `O(1)` times. By default, `ThreadedRodeo` uses an [`fxhash`] [`Hasher`].
pub type ThreadedRodeo<S = Hasher> = lasso::ThreadedRodeo<Key, S>;
/// Constructs a new interner that can be used across multiple threads.
/// Constructs a new [`Interner`](traits::Interner) that can be used across multiple threads.
///
/// Note that you can use `&MultiThreadTokenInterner` to access interning methods through a shared reference, as well as
/// construct new syntax trees. See [the module documentation](self) for more information and examples.
#[cfg(feature = "multi_threaded_interning")]
#[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))]
#[inline]
pub fn new_threaded_interner() -> ThreadedRodeo {
ThreadedRodeo::with_hasher(Hasher::default())
pub fn new_threaded_interner() -> MultiThreadedTokenInterner {
MultiThreadedTokenInterner::new()
}
/// A read-only view of a [`Rodeo`] or [`ThreadedRodeo`] that allows contention-free access to interned strings, both
/// key to string resolution and string to key lookups.
///
/// The hasher is the same as the Rodeo or ThreadedRodeo that created it.
/// Can be acquired with the `into_reader` methods (see also [`IntoReader`]).
pub type RodeoReader<S = Hasher> = lasso::RodeoReader<Key, S>;
/// A read-only view of a [`Rodeo`] or [`ThreadedRodeo`] that allows contention-free access to interned strings with
/// only key to string resolution.
///
/// Can be acquired with the `into_resolver` methods (see also [`IntoResolver`]).
pub type RodeoResolver = lasso::RodeoResolver<Key>;
pub use lasso::{Capacity, Iter, LassoError, LassoErrorKind, LassoResult, MemoryLimits, Strings};

View file

@ -0,0 +1,70 @@
#![cfg(not(feature = "lasso_compat"))]
use core::fmt;
use fxhash::FxBuildHasher as Hasher;
use indexmap::IndexSet;
use super::{InternKey, Interner, Resolver, TokenKey};
/// The default [`Interner`] used to deduplicate green token strings.
#[derive(Debug)]
pub struct TokenInterner {
id_set: IndexSet<String, Hasher>,
}
impl TokenInterner {
pub(in crate::interning) fn new() -> Self {
Self {
id_set: IndexSet::default(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum InternerError {
KeySpaceExhausted,
}
impl fmt::Display for InternerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
InternerError::KeySpaceExhausted => write!(f, "key space exhausted"),
}
}
}
impl std::error::Error for InternerError {}
impl Resolver<TokenKey> for TokenInterner {
fn try_resolve(&self, key: TokenKey) -> Option<&str> {
let index = key.into_u32() as usize;
self.id_set.get_index(index).map(String::as_str)
}
}
// `TokenKey` can represent `1` to `u32::MAX` (due to the `NonNull` niche), so `u32::MAX` elements.
// Set indices start at 0, so everything shifts down by 1.
const N_INDICES: usize = u32::MAX as usize;
impl Interner<TokenKey> for TokenInterner {
type Error = InternerError;
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
if let Some(index) = self.id_set.get_index_of(text) {
let raw_key = u32::try_from(index).unwrap_or_else(|_| {
panic!("found interned text with invalid index `{index}` (index too high for keyspace)")
});
return Ok(TokenKey::try_from_u32(raw_key).unwrap_or_else(|| {
panic!("found interned text with invalid index `{index}` (index too high for keyspace)")
}));
} else if self.id_set.len() >= N_INDICES {
return Err(InternerError::KeySpaceExhausted);
}
let (index, added) = self.id_set.insert_full(text.to_string());
debug_assert!(added, "tried to intern duplicate text");
let raw_key = u32::try_from(index).unwrap_or_else(|_| panic!("interned `{index}` despite keyspace exhaustion"));
TokenKey::try_from_u32(raw_key).ok_or(InternerError::KeySpaceExhausted)
}
}

View file

@ -0,0 +1,9 @@
//! Bridge between `cstree`'s and `lasso`'s types and traits.
#![cfg(feature = "lasso_compat")]
mod token_interner;
#[doc(inline)]
pub use token_interner::*;
mod traits;

View file

@ -0,0 +1,109 @@
//! Default interner implementations based on `lasso`.
#![cfg(feature = "lasso_compat")]
use std::{hash::BuildHasher, num::NonZeroUsize};
use fxhash::FxBuildHasher as Hasher;
use lasso::{Capacity, Rodeo, ThreadedRodeo};
use crate::interning::{Interner, Resolver, TokenKey};
/// Default number of strings that the interner will initially allocate space for.
/// Value recommended by the author of `lasso`.
const DEFAULT_STRING_CAPACITY: usize = 512;
/// Default memory in bytes that the interner will initially allocate space for.
/// Value recommended by the author of `lasso`.
const DEFAULT_BYTE_CAPACITY: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(4096) };
macro_rules! impl_traits {
(for $interner:ty $(, if #[cfg(feature = $feature:literal)])?) => {
$(#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))])?
impl Resolver<TokenKey> for $interner {
#[inline]
fn try_resolve(&self, key: TokenKey) -> Option<&str> {
self.rodeo.try_resolve(&key)
}
#[inline]
fn resolve(&self, key: TokenKey) -> &str {
self.rodeo.resolve(&key)
}
}
$(#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))])?
impl Interner<TokenKey> for $interner {
type Error = lasso::LassoError;
#[inline]
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
self.rodeo.try_get_or_intern(text)
}
#[inline]
fn get_or_intern(&mut self, text: &str) -> TokenKey {
self.rodeo.get_or_intern(text)
}
}
};
}
/// The default [`Interner`] used to deduplicate green token strings.
#[derive(Debug)]
pub struct TokenInterner {
rodeo: Rodeo<TokenKey, Hasher>,
}
impl TokenInterner {
pub(in crate::interning) fn new() -> Self {
Self {
rodeo: Rodeo::with_capacity_and_hasher(
Capacity::new(DEFAULT_STRING_CAPACITY, DEFAULT_BYTE_CAPACITY),
Hasher::default(),
),
}
}
/// Returns the [`Rodeo`] backing this interner.
#[cfg_attr(doc_cfg, doc(cfg(feature = "lasso_compat")))]
#[inline]
pub fn into_inner(self) -> Rodeo<TokenKey, impl BuildHasher> {
self.rodeo
}
}
impl_traits!(for TokenInterner);
#[cfg(feature = "multi_threaded_interning")]
pub use multi_threaded::MultiThreadedTokenInterner;
#[cfg(feature = "multi_threaded_interning")]
mod multi_threaded {
use super::*;
/// A threadsafe [`Interner`] for deduplicating [`GreenToken`](crate::green::GreenToken) strings.
///
/// Note that [`Interner`] and [`Resolver`] are also implemented for `&MultiThreadTokenInterner` so you can pass
/// `&mut &interner` in shared contexts.
#[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))]
#[derive(Debug)]
pub struct MultiThreadedTokenInterner {
rodeo: ThreadedRodeo<TokenKey, Hasher>,
}
impl MultiThreadedTokenInterner {
pub(in crate::interning) fn new() -> Self {
Self {
rodeo: ThreadedRodeo::with_capacity_and_hasher(
Capacity::new(DEFAULT_STRING_CAPACITY, DEFAULT_BYTE_CAPACITY),
Hasher::default(),
),
}
}
}
impl_traits!(for MultiThreadedTokenInterner, if #[cfg(feature = "multi_threaded_interning")]);
impl_traits!(for &MultiThreadedTokenInterner, if #[cfg(feature = "multi_threaded_interning")]);
}

View file

@ -0,0 +1,166 @@
#![cfg(feature = "lasso_compat")]
use core::fmt;
use std::hash::{BuildHasher, Hash};
use crate::interning::{
traits::{InternKey, Interner, Resolver},
TokenKey,
};
// Safety: `InternKey` has the same invariant as `lasso::Key`
unsafe impl lasso::Key for TokenKey {
fn into_usize(self) -> usize {
self.into_u32() as usize
}
fn try_from_usize(int: usize) -> Option<Self> {
let raw_key = u32::try_from(int).ok()?;
Self::try_from_u32(raw_key)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum LassoCompatError {
LassoError(lasso::LassoError),
KeyConversionError { lasso_key: usize },
}
impl From<lasso::LassoError> for LassoCompatError {
#[inline]
fn from(error: lasso::LassoError) -> Self {
Self::LassoError(error)
}
}
impl fmt::Display for LassoCompatError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
LassoCompatError::LassoError(lasso_error) => write!(f, "{lasso_error}"),
LassoCompatError::KeyConversionError { lasso_key } => write!(
f,
"invalid key: failed to convert `lasso::Key` `{lasso_key}` to `InternKey`"
),
}
}
}
impl std::error::Error for LassoCompatError {}
macro_rules! compat_resolver {
($resolver:ident<K$(, $hasher:ident)?> $(where $($t:ident : $bound:ident),+)? $(if #[cfg(feature = $feature:literal)])?) => {
$(#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))])?
impl<K$(, $hasher)?> Resolver<TokenKey> for lasso::$resolver<K$(, $hasher)?>
where
K: lasso::Key,
$($($t: $bound),+)?
{
fn try_resolve(&self, key: TokenKey) -> Option<&str> {
let raw_key = TokenKey::into_u32(key);
let lasso_key = K::try_from_usize(raw_key as usize)?;
<Self as lasso::Resolver<K>>::try_resolve(self, &lasso_key)
}
fn resolve(&self, key: TokenKey) -> &str {
let raw_key = TokenKey::into_u32(key);
let lasso_key = K::try_from_usize(raw_key as usize).expect(&format!(
"invalid key: failed to convert `{key:?}` to `lasso::Key`"
));
<Self as lasso::Resolver<K>>::resolve(self, &lasso_key)
}
}
};
}
macro_rules! compat_interner {
($interner:ident<K, S> $(where $($t:ident : $bound:ident),+)? if #[cfg(feature = $feature:literal)]) => {
#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))]
impl<K, S> Interner<TokenKey> for lasso::$interner<K, S>
where
K: lasso::Key,
S: BuildHasher,
$($($t: $bound),+)?
{
type Error = LassoCompatError;
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
let lasso_key = <Self as lasso::Interner<K>>::try_get_or_intern(self, text)?;
let raw_key = K::into_usize(lasso_key);
u32::try_from(raw_key)
.ok()
.and_then(TokenKey::try_from_u32)
.ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key })
}
fn get_or_intern(&mut self, text: &str) -> TokenKey {
let lasso_key = <Self as lasso::Interner<K>>::get_or_intern(self, text);
let raw_key = K::into_usize(lasso_key);
u32::try_from(raw_key)
.ok()
.and_then(TokenKey::try_from_u32)
.ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key })
.unwrap_or_else(|_| panic!("invalid key: failed to convert `lasso::Key` `{raw_key}` to `InternKey` (failed to intern {text:?})"))
}
}
};
}
compat_resolver!(RodeoReader<K, S> if #[cfg(feature = "lasso_compat")]);
compat_resolver!(RodeoResolver<K> if #[cfg(feature = "lasso_compat")]);
compat_resolver!(Rodeo<K, S> if #[cfg(feature = "lasso_compat")]);
compat_interner!(Rodeo<K, S> if #[cfg(feature = "lasso_compat")]);
#[cfg(feature = "multi_threaded_interning")]
mod multi_threaded {
use super::*;
compat_resolver!(ThreadedRodeo<K, S> where K: Hash, S: BuildHasher, S: Clone if #[cfg(feature = "multi_threaded_interning")]);
compat_interner!(ThreadedRodeo<K, S> where K: Hash, S: Clone if #[cfg(feature = "multi_threaded_interning")]);
#[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))]
impl<K, S> Resolver<TokenKey> for &lasso::ThreadedRodeo<K, S>
where
K: lasso::Key + Hash,
S: BuildHasher + Clone,
{
#[inline]
fn try_resolve(&self, key: TokenKey) -> Option<&str> {
<lasso::ThreadedRodeo<K, S> as Resolver<TokenKey>>::try_resolve(self, key)
}
#[inline]
fn resolve(&self, key: TokenKey) -> &str {
<lasso::ThreadedRodeo<K, S> as Resolver<TokenKey>>::resolve(self, key)
}
}
#[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))]
impl<K, S> Interner<TokenKey> for &lasso::ThreadedRodeo<K, S>
where
K: lasso::Key + Hash,
S: BuildHasher + Clone,
{
type Error = <lasso::ThreadedRodeo<K, S> as Interner<TokenKey>>::Error;
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
let lasso_key = <Self as lasso::Interner<K>>::try_get_or_intern(self, text)?;
let raw_key = K::into_usize(lasso_key);
u32::try_from(raw_key)
.ok()
.and_then(TokenKey::try_from_u32)
.ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key })
}
fn get_or_intern(&mut self, text: &str) -> TokenKey {
let lasso_key = <Self as lasso::Interner<K>>::get_or_intern(self, text);
let raw_key = K::into_usize(lasso_key);
u32::try_from(raw_key)
.ok()
.and_then(TokenKey::try_from_u32)
.ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key })
.unwrap_or_else(|_| panic!("invalid key: failed to convert `lasso::Key` `{raw_key}` to `InternKey` (failed to intern {text:?})"))
}
}
}

View file

@ -0,0 +1,228 @@
//! # Using a `salsa` database as the interner for `cstree`
//!
//! <p
//! style="background:rgba(255,181,77,0.16);padding:0.75em;white-space:normal;font:inherit;">
//! <strong>Warning</strong>: Compatibility is only provided for "Salsa 2022".
//! This version is currently under active development and <code style="background:rgba(41,24,0,0.9);">cstree</code>'s
//! compatibility features are unstable until there is an official
//! release.
//! Older versions of `salsa` are not supported.
//! </p>
//!
//! If you are using the `salsa` query system, you already have access to an implemenation of interning through
//! [`#[salsa::interned]`](macro@salsa::interned). This is all that is needed to use `cstree` and this module provides
//! the utilities needed to use `salsa`'s interners for working with syntax trees.
//!
//! Note that the primary benefit of this is that it avoids additional dependencies because it uses an interner that you
//! already depend on, but it can also be beneficial to use an interner that is more specialized towards string
//! interning. In particular, using `salsa`'s interning requires allocating all strings that are interned even if they
//! are deduplicated because they already exist in the interner.
//!
//! ## How to do it
//!
//! ```
//! # use cstree::testing::*;
//! # use cstree::interning::salsa_compat::salsa;
//! # use cstree::impl_cstree_interning_for_salsa;
//! // Define the `salsa` jar, database and intern Id
//! #[salsa::jar(db = Db)]
//! pub struct Jar(SourceId);
//!
//! pub trait Db: salsa::DbWithJar<Jar> {}
//! impl<DB> Db for DB where DB: ?Sized + salsa::DbWithJar<Jar> {}
//!
//! // If you are not a doctest and can put `Jar` at the root of your crate,
//! // this can just be `#[salsa::interned]`.
//! #[salsa::interned(jar = Jar)]
//! pub struct SourceId {
//! #[return_ref]
//! pub text: String,
//! }
//!
//! #[derive(Default)]
//! #[salsa::db(Jar)]
//! struct Database {
//! storage: salsa::Storage<Self>,
//! }
//! impl salsa::Database for Database {}
//!
//! // Let `cstree` define a conversion trait and implement it for your database.
//! // `Database` is your db type, `SourceId` is your interning id, and `text` is
//! // its text field (all as defined above).
//! impl_cstree_interning_for_salsa!(impl Interning for Database => text as SourceId);
//!
//! // Build a tree with the `salsa` interner
//! let db = Database::default();
//! let interner = db.as_interner(); // <-- conversion happens here
//! let mut shared_interner = &interner;
//! let mut builder: GreenNodeBuilder<TestLang, _> = GreenNodeBuilder::with_interner(&mut shared_interner);
//! let (tree, _no_interner_because_it_was_borrowed) = {
//! builder.start_node(TestSyntaxKind::Plus);
//! builder.token(TestSyntaxKind::Float, "2.05");
//! builder.token(TestSyntaxKind::Whitespace, " ");
//! builder.token(TestSyntaxKind::Plus, "+");
//! builder.token(TestSyntaxKind::Whitespace, " ");
//! builder.token(TestSyntaxKind::Float, "7.32");
//! builder.finish_node();
//! builder.finish()
//! };
//! let tree: SyntaxNode<TestLang> = SyntaxNode::new_root(tree);
//! assert_eq!(tree.resolve_text(shared_interner), "2.05 + 7.32");
//! ```
//!
//! The full code is also available in the `salsa` example.
//!
//! ## Working with `InternWithDb` directly
//! If you don't want the trait, or macros, or if you just need more control about what happens during interning and
//! resolution, you can skip using [`impl_cstree_interning_for_salsa`](crate::impl_cstree_interning_for_salsa) and use
//! [`InternWithDb`] directly.
//!
//! Because `salsa` generates inherent methods (and not, for example, a trait implementation), we need information about
//! the used interning id either way. All that `as_interner` does is construct an instance of `InternWithDb` that uses
//! the generated methods to invoke `salsa`s interner. The implementation expands to
//! ```text
//! InternWithDb::new(
//! db,
//! |db, text| SourceId::new(db, text),
//! |db, id| id.text(db),
//! )
//! ```
//! but you may provide any function that doesn't capture.
#![cfg(feature = "salsa_2022_compat")]
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
pub use salsa;
use core::fmt;
use super::{InternKey, Interner, Resolver, TokenKey};
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
impl salsa::AsId for TokenKey {
fn as_id(self) -> salsa::Id {
salsa::Id::from_u32(self.into_u32())
}
/// Create an instance of the intern-key from an ID.
///
/// # Panics
/// Panics if the given `id` from `salsa` cannot be represented by a [`TokenKey`].
fn from_id(id: salsa::Id) -> Self {
TokenKey::try_from_u32(id.as_u32())
.unwrap_or_else(|| panic!("`salsa::Id` is invalid for `TokenKey`'s keyspace: {id:?}"))
}
}
/// Generates an extension trait `SalsaAsInterner` that lets you call `db.as_interner()` on your [`salsa::Database`] to
/// obtain a `cstree` compatible [`Interner`].
///
/// The `as_interner` method returns an instance of [`InternWithDb`] that uses the functions generated by `salsa` for
/// your Id type to perform interning and resolution.
///
/// If you have defined your interned text as
/// ```ignore
/// #[salsa::interned]
/// pub struct SourceId {
/// #[return_ref]
/// pub text: String,
/// }
/// ```
/// the syntax is
/// ```ignore
/// impl_cstree_interning_for_salsa!(impl Interning for YourDatabase => text as SourceId);
/// ```
/// where `text` the name of the interned field.
/// Note that the use of `#[return_ref]` is required.
#[macro_export]
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
macro_rules! impl_cstree_interning_for_salsa {
(impl Interning for $db:ty => $name:ident as $id:ty) => {
trait SalsaAsInterner {
fn as_interner(&self) -> ::cstree::interning::salsa_compat::InternWithDb<'_, $db, $id>;
}
impl SalsaAsInterner for Database {
fn as_interner(&self) -> ::cstree::interning::salsa_compat::InternWithDb<'_, $db, $id> {
::cstree::interning::salsa_compat::InternWithDb::new(
self,
|db, text| <$id>::new(db, text),
|db, id| id.$name(db),
)
}
}
};
}
/// This type allows you to wrap access to a [`salsa::Database`] together with an interning and a lookup function, which
/// makes it implement [`Interner`] and [`Resolver`]. The [module documentation](self) shows how to use this with your
/// own database, or you can use [`impl_cstree_interning_for_salsa`](crate::impl_cstree_interning_for_salsa).
///
/// The interning traits are also implemented by `&InternWithDb`, as the `salsa` database supports interning through
/// shared references (see also [the `interning` module documentation](super)).
#[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))]
pub struct InternWithDb<'db, Db: salsa::Database, Id: salsa::interned::InternedId> {
db: &'db Db,
intern: fn(&Db, text: String) -> Id,
lookup: fn(&Db, Id) -> &str,
}
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> fmt::Debug for InternWithDb<'db, Db, Id> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("InternWithDb")
}
}
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> InternWithDb<'db, Db, Id> {
/// Create an [`Interner`] that works with `cstree` but uses the given `db` from `salsa`.
/// To do this, you need to provide a function for interning new strings that creates the [`InternedId`] that you
/// defined with [`#[salsa::interned]`](macro@salsa::interned), and a second one that resolves an Id using your
/// database. See the [module documentation](self) for an example.
///
/// [`InternedId`]: salsa::interned::InternedId
pub fn new(db: &'db Db, intern: fn(&Db, text: String) -> Id, lookup: fn(&Db, Id) -> &str) -> Self {
Self { db, intern, lookup }
}
}
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Resolver<TokenKey> for InternWithDb<'db, Db, Id> {
fn try_resolve(&self, key: TokenKey) -> Option<&'db str> {
use salsa::AsId;
let key = Id::from_id(key.as_id());
let text = (self.lookup)(self.db, key);
Some(text)
}
}
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Interner<TokenKey> for InternWithDb<'db, Db, Id> {
type Error = std::convert::Infallible;
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
use salsa::AsId;
let id = (self.intern)(self.db, text.to_string());
Ok(TokenKey::from_id(id.as_id()))
}
}
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Resolver<TokenKey> for &InternWithDb<'db, Db, Id> {
fn try_resolve(&self, key: TokenKey) -> Option<&'db str> {
use salsa::AsId;
let key = Id::from_id(key.as_id());
let text = (self.lookup)(self.db, key);
Some(text)
}
}
impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Interner<TokenKey> for &InternWithDb<'db, Db, Id> {
type Error = std::convert::Infallible;
fn try_get_or_intern(&mut self, text: &str) -> Result<TokenKey, Self::Error> {
use salsa::AsId;
let id = (self.intern)(self.db, text.to_string());
Ok(TokenKey::from_id(id.as_id()))
}
}

67
src/interning/traits.rs Normal file
View file

@ -0,0 +1,67 @@
use core::fmt;
use super::TokenKey;
/// Common interface for all intern keys via conversion to and from `u32`.
///
/// # Safety
/// Implementations must guarantee that keys can round-trip in both directions: going from `Self` to `u32` to `Self` and
/// going from `u32` to `Self` to `u32` must each yield the original value.
pub unsafe trait InternKey: Copy + Eq + fmt::Debug {
/// Convert `self` into its raw representation.
fn into_u32(self) -> u32;
/// Try to reconstruct an intern key from its raw representation.
/// Returns `None` if `key` is not a valid key.
fn try_from_u32(key: u32) -> Option<Self>;
}
/// The read-only part of an interner.
/// Allows to perform lookups of intern keys to resolve them to their interned text.
pub trait Resolver<Key: InternKey = TokenKey> {
/// Tries to resolve the given `key` and return its interned text.
///
/// If `self` does not contain any text for `key`, `None` is returned.
fn try_resolve(&self, key: Key) -> Option<&str>;
/// Resolves `key` to its interned text.
///
/// # Panics
/// Panics if there is no text for `key`.
///
/// Compatibility implementations for interners from other crates may also panic if `key` cannot be converted to the
/// key type of the external interner. Please ensure you configure any external interners appropriately (for
/// example by choosing an appropriately sized key type).
fn resolve(&self, key: Key) -> &str {
self.try_resolve(key)
.unwrap_or_else(|| panic!("failed to resolve `{key:?}`"))
}
}
/// A full interner, which can intern new strings returning intern keys and also resolve intern keys to the interned
/// value.
///
/// **Note:** Because single-threaded interners may require mutable access, the methods on this trait take `&mut self`.
/// In order to use a multi- (or single)-threaded interner that allows access through a shared reference, it is
/// implemented for `&`[`MultiThreadedTokenInterner`](crate::interning::MultiThreadedTokenInterner), allowing it to be
/// used with a `&mut &MultiThreadTokenInterner`.
pub trait Interner<Key: InternKey = TokenKey>: Resolver<Key> {
/// Represents possible ways in which interning may fail.
/// For example, this might be running out of fresh intern keys, or failure to allocate sufficient space for a new
/// value.
type Error;
/// Interns `text` and returns a new intern key for it.
/// If `text` was already previously interned, it will not be used and the existing intern key for its value will be
/// returned.
fn try_get_or_intern(&mut self, text: &str) -> Result<Key, Self::Error>;
/// Interns `text` and returns a new intern key for it.
///
/// # Panics
/// Panics if the internment process raises an [`Error`](Interner::Error).
fn get_or_intern(&mut self, text: &str) -> Key {
self.try_get_or_intern(text)
.unwrap_or_else(|_| panic!("failed to intern `{text:?}`"))
}
}

View file

@ -2,7 +2,7 @@
//! "Traditional" abstract syntax trees (ASTs) usually contain different types of nodes which represent information
//! about the source text of a document and reduce this information to the minimal amount necessary to correctly
//! interpret it. In contrast, CSTs are lossless representations of the entire input where all tree nodes are
//! represented uniformly (i.e. the nodes are _untyped_), but include a [`SyntaxKind`] field to determine the kind of
//! represented uniformly (i.e. the nodes are _untyped_), but include a [`RawSyntaxKind`] field to determine the kind of
//! node.
//! One of the big advantages of this representation is not only that it can recreate the original source exactly, but
//! also that it lends itself very well to the representation of _incomplete or erroneous_ trees and is thus very suited
@ -35,41 +35,385 @@
//! references. You can still `clone` to obtain an owned node, but you only pay that cost when you need to.
//!
//! ## Getting Started
//! The main entry points for constructing syntax trees are [`GreenNodeBuilder`] and [`SyntaxNode::new_root`] for green
//! and red trees respectively. See `examples/s_expressions.rs` for a guided tutorial to `cstree`.
//! If you're looking at `cstree`, you're probably looking at or already writing a parser and are considering using
//! concrete syntax trees as its output. We'll talk more about parsing below -- first, let's have a look at what needs
//! to happen to go from input text to a `cstree` syntax tree:
//!
//! 1. Define an enumeration of the types of tokens (like keywords) and nodes (like "an expression") that you want to
//! have in your syntax and implement [`Language`]
//!
//! 2. Create a [`GreenNodeBuilder`](build::GreenNodeBuilder) and call
//! [`start_node`](build::GreenNodeBuilder::start_node), [`token`](build::GreenNodeBuilder::token) and
//! [`finish_node`](build::GreenNodeBuilder::finish_node) from your parser
//!
//! 3. Call [`SyntaxNode::new_root`](syntax::SyntaxNode::new_root) or
//! [`SyntaxNode::new_root_with_resolver`](syntax::SyntaxNode::new_root_with_resolver) with the resulting
//! [`GreenNode`](green::GreenNode) to obtain a syntax tree that you can traverse
//!
//! Let's walk through the motions of parsing a (very) simple language into `cstree` syntax trees.
//! We'll just support addition and subtraction on integers, from which the user is allowed to construct a single,
//! compound expression. They will, however, be allowed to write nested expressions in parentheses, like `1 - (2 + 5)`.
//!
//! ### Defining the language
//!
//! First, we need to list the different part of our language's grammar.
//! We can do that using an `enum` with a unit variant for any terminal and non-terminal.
//! The `enum` needs to be convertible to a `u16`, so we use the `repr` attribute to ensure it uses the correct
//! representation.
//!
//! ```rust,ignore
//! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
//! #[repr(u16)]
//! enum SyntaxKind {
//! /* Tokens */
//! Int, // 42
//! Plus, // +
//! Minus, // -
//! LParen, // (
//! RParen, // )
//! /* Nodes */
//! Expr,
//! Root,
//! }
//! ```
//!
//! Most of these are tokens to lex the input string into, like numbers (`Int`) and operators (`Plus`, `Minus`).
//! We only really need one type of node; expressions.
//! Our syntax tree's root node will have the special kind `Root`, all other nodes will be
//! expressions containing a sequence of arithmetic operations potentially involving further, nested
//! expression nodes.
//!
//! To use our `SyntaxKind`s with `cstree`, we need to tell it how to convert it back to just a number (the
//! `#[repr(u16)]` that we added) by implementing the [`Language`] trait. We can also tell `cstree` about tokens that
//! always have the same text through the `static_text` method on the trait. This is useful for the operators and
//! parentheses, but not possible for numbers, since an integer token may be produced from the input `3`, but also from
//! other numbers like `7` or `12`. We implement `Language` on an empty type, just so we can give it a name.
//!
//! ```rust,ignore
//! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
//! pub struct Calculator;
//! impl Language for Calculator {
//! // The tokens and nodes we just defined
//! type Kind = SyntaxKind;
//!
//! fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
//! // This just needs to be the inverse of `kind_to_raw`, but could also
//! // be an `impl TryFrom<u16> for SyntaxKind` or any other conversion.
//! match raw.0 {
//! 0 => SyntaxKind::Int,
//! 1 => SyntaxKind::Plus,
//! 2 => SyntaxKind::Minus,
//! 3 => SyntaxKind::LParen,
//! 4 => SyntaxKind::RParen,
//! 5 => SyntaxKind::Expr,
//! 6 => SyntaxKind::Root,
//! n => panic!("Unknown raw syntax kind: {n}"),
//! }
//! }
//!
//! fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
//! RawSyntaxKind(kind as u16)
//! }
//!
//! fn static_text(kind: Self::Kind) -> Option<&'static str> {
//! match kind {
//! SyntaxKind::Plus => Some("+"),
//! SyntaxKind::Minus => Some("-"),
//! SyntaxKind::LParen => Some("("),
//! SyntaxKind::RParen => Some(")"),
//! _ => None,
//! }
//! }
//! }
//! ```
//!
//! ### Parsing into a green tree
//! With that out of the way, we can start writing the parser for our expressions.
//! For the purposes of this introduction to `cstree`, I'll assume that there is a lexer that yields the following
//! tokens:
//!
//! ```rust,ignore
//! #[derive(Debug, PartialEq, Eq, Clone, Copy)]
//! pub enum Token<'input> {
//! // Note that number strings are not yet parsed into actual numbers,
//! // we just remember the slice of the input that contains their digits
//! Int(&'input str),
//! Plus,
//! Minus,
//! LParen,
//! RParen,
//! // A special token that indicates that we have reached the end of the file
//! EoF,
//! }
//! ```
//!
//! A simple lexer that yields such tokens is part of the full `readme` example, but we'll be busy enough with the
//! combination of `cstree` and the actual parser, which we define like this:
//!
//! ```rust,ignore
//! pub struct Parser<'input> {
//! // `Peekable` is a standard library iterator adapter that allows
//! // looking ahead at the next item without removing it from the iterator yet
//! lexer: Peekable<Lexer<'input>>,
//! builder: GreenNodeBuilder<'static, 'static, Calculator>,
//! }
//!
//! impl<'input> Parser<'input> {
//! pub fn new(input: &'input str) -> Self {
//! Self {
//! // we get `peekable` from implementing `Iterator` on `Lexer`
//! lexer: Lexer::new(input).peekable(),
//! builder: GreenNodeBuilder::new(),
//! }
//! }
//!
//! pub fn bump(&mut self) -> Option<Token<'input>> {
//! self.lexer.next()
//! }
//! }
//! ```
//!
//! In contrast to parsers that return abstract syntax trees, with `cstree` the syntax tree nodes
//! for all element in the language grammar will have the same type: [`GreenNode`](green::GreenNode)
//! for the inner ("green") tree and [`SyntaxNode`](syntax::SyntaxNode) for the outer ("red") tree.
//! Different kinds of nodes (and tokens) are differentiated by their `SyntaxKind` tag, which we defined above.
//!
//! You can implement many types of parsers with `cstree`. To get a feel for how it works, consider
//! a typical recursive descent parser. With a more traditional AST, one would define different AST
//! structs for struct or function definitions, statements, expressions and so on. Inside the
//! parser, the components of any element, such as all fields of a struct or all statements inside a
//! function, are parsed first and then the parser wraps them in the matching AST type, which is
//! returned from the corresponding parser function.
//!
//! Because `cstree`'s syntax trees are untyped, there is no explicit AST representation that the
//! parser would build. Instead, parsing into a CST using the
//! [`GreenNodeBuilder`](build::GreenNodeBuilder) follows the source code more closely in that you
//! tell `cstree` about each new element you enter and all tokens that the parser consumes. So, for
//! example, to parse a struct definition the parser first "enters" the struct definition node, then
//! parses the `struct` keyword and type name, then parses each field, and finally "finishes"
//! parsing the struct node.
//!
//! The most trivial example is the root node for our parser, which just creates a root node
//! containing the whole expression (we could do without a specific root node if any expression was
//! a node, in particular if we wrapped integer literal tokens inside `Expr` nodes).
//!
//! ```rust,ignore
//! pub fn parse(&mut self) -> Result<(), String> {
//! self.builder.start_node(SyntaxKind::Root);
//! self.parse_expr()?;
//! self.builder.finish_node();
//! Ok(())
//! }
//! ```
//!
//! As there isn't a static AST type to return, the parser is very flexible as to what is part of a
//! node. In the previous example, if the user is adding a new field to the struct and has not yet
//! typed the field's type, the CST node for the struct doesn't care if there is no child node for
//! it. Similarly, if the user is deleting fields and the source code currently contains a leftover
//! field name, this additional identifier can be a part of the struct node without any
//! modifications to the syntax tree definition. This property is the key to why CSTs are such a
//! good fit as a lossless input representation, which necessitates the syntax tree to mirror the
//! user-specific layout of whitespace and comments around the AST items.
//!
//! In the parser for our simple expression language, we'll also have to deal with the fact that,
//! when we see a number the parser doesn't yet know whether there will be additional operations
//! following that number. That is, in the expression `1 + 2`, it can only know that it is parsing
//! a binary operation once it sees the `+`. The event-like model of building trees in `cstree`,
//! however, implies that when reaching the `+`, the parser would have to have already entered an
//! expression node in order for the whole input to be part of the expression.
//!
//! To get around this, `GreenNodeBuilder` provides the
//! [`checkpoint`](build::GreenNodeBuilder::checkpoint) method, which we can call to "remember" the
//! current position in the input. For example, we can create a checkpoint before the parser parses
//! the first `1`. Later, when it sees the following `+`, it can create an `Expr` node for the
//! whole expression using [`start_node_at`](build::GreenNodeBuilder::start_node_at):
//!
//! ```rust,ignore
//! fn parse_lhs(&mut self) -> Result<(), String> {
//! // An expression may start either with a number, or with an opening parenthesis that is
//! // the start of a parenthesized expression
//! let next_token = *self.lexer.peek().unwrap();
//! match next_token {
//! Token::Int(n) => {
//! self.bump();
//! self.builder.token(SyntaxKind::Int, n);
//! }
//! Token::LParen => {
//! // Wrap the grouped expression inside a node containing it and its parentheses
//! self.builder.start_node(SyntaxKind::Expr);
//! self.bump();
//! self.builder.static_token(SyntaxKind::LParen);
//! self.parse_expr()?; // Inner expression
//! if self.bump() != Some(Token::RParen) {
//! return Err("Missing ')'".to_string());
//! }
//! self.builder.static_token(SyntaxKind::RParen);
//! self.builder.finish_node();
//! }
//! Token::EoF => return Err("Unexpected end of file: expected expression".to_string()),
//! t => return Err(format!("Unexpected start of expression: '{t:?}'")),
//! }
//! Ok(())
//! }
//!
//! fn parse_expr(&mut self) -> Result<(), String> {
//! // Remember our current position
//! let before_expr = self.builder.checkpoint();
//!
//! // Parse the start of the expression
//! self.parse_lhs()?;
//!
//! // Check if the expression continues with `+ <more>` or `- <more>`
//! let Some(next_token) = self.lexer.peek() else {
//! return Ok(());
//! };
//! let op = match *next_token {
//! Token::Plus => SyntaxKind::Plus,
//! Token::Minus => SyntaxKind::Minus,
//! Token::RParen | Token::EoF => return Ok(()),
//! t => return Err(format!("Expected operator, found '{t:?}'")),
//! };
//!
//! // If so, retroactively wrap the (already parsed) LHS and the following RHS
//! // inside an `Expr` node
//! self.builder.start_node_at(before_expr, SyntaxKind::Expr);
//! self.bump();
//! self.builder.static_token(op);
//! self.parse_expr()?; // RHS
//! self.builder.finish_node();
//! Ok(())
//! }
//! ```
//!
//! ### Obtaining the parser result
//!
//! Our parser is now capable of parsing our little arithmetic language, but it's methods don't
//! return anything. So how do we get our syntax tree out? The answer lies in
//! [`GreenNodeBuilder::finish`](build::GreenNodeBuilder::finish), which finally returns the tree
//! that we have painstakingly constructed.
//!
//! ```rust,ignore
//! impl Parser<'_> {
//! pub fn finish(mut self) -> (GreenNode, impl Interner) {
//! assert!(self.lexer.next().map(|t| t == Token::EoF).unwrap_or(true));
//! let (tree, cache) = self.builder.finish();
//! (tree, cache.unwrap().into_interner().unwrap())
//! }
//! }
//! ```
//!
//! `finish` also returns the cache it used to deduplicate tree nodes and tokens, so you can re-use
//! it for parsing related inputs (e.g., different source files from the same crate may share a lot
//! of common function and type names that can be deduplicated). See `GreenNodeBuilder`'s
//! documentation for more information on this, in particular the `with_cache` and `from_cache`
//! methods. Most importantly for us, we can extract the [`Interner`](interning::Interner) that
//! contains the source text of the tree's tokens from the cache, which we need if we want to look
//! up things like variable names or the value of numbers for our calculator.
//!
//! To work with the syntax tree, you'll want to upgrade it to a [`SyntaxNode`](syntax::SyntaxNode)
//! using [`SyntaxNode::new_root`](syntax::SyntaxNode::new_root). You can also use
//! [`SyntaxNode::new_root_with_resolver`](syntax::SyntaxNode::new_root_with_resolver) to combine
//! tree and interner, which lets you directly retrieve source text and makes the nodes implement
//! `Display` and `Debug`. The same output can be produced from `SyntaxNode`s by calling the
//! `debug` or `display` method with a [`Resolver`](interning::Resolver). To visualize the whole
//! syntax tree, pass `true` for the `recursive` parameter on `debug`, or simply debug-print a
//! [`ResolvedNode`](syntax::ResolvedNode):
//!
//! ```rust,ignore
//! let input = "11 + 2-(5 + 4)";
//! let mut parser = Parser::new(input);
//! parser.parse().unwrap();
//! let (tree, interner) = parser.finish();
//! let root = SyntaxNode::<Calculator>::new_root_with_resolver(tree, interner);
//! dbg!(root);
//! ```
//!
//! ### Further examples
//! The parser we just built is available in full in the runnable `readme` example, which includes some additional code
//! to read expressions from the terminal and evaluate the parsed expressions - have it do a few calculations if you
//! like.
//! There are several more examples in the `examples/` folder in the repository.
//! A good starting point is the `s_expressions` example, which implements a parser for a small S-Expression language
//! with guiding comments.
//!
//! ## AST Layer
//! While `cstree` is built for concrete syntax trees, applications are quite easily able to work with either a CST or
//! an AST representation, or freely switch between them. To do so, use `cstree` to build syntax and underlying green
//! tree and provide AST wrappers for your different kinds of nodes. An example of how this is done can be seen [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/crates/syntax/src/ast/generated.rs) and [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/crates/syntax/src/ast/generated/nodes.rs) (note that the latter file is automatically generated by a task).
//! tree and provide AST wrappers for your different kinds of nodes. An example of how this is done can be seen [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/crates/syntax/src/ast/generated.rs)
//! and [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/crates/syntax/src/ast/generated/nodes.rs)
//! (note that the latter file is automatically generated by a task using [`ungrammar`](https://crates.io/crates/ungrammar)).
#![forbid(missing_debug_implementations, unconditional_recursion)]
#![deny(unsafe_code, missing_docs, future_incompatible)]
#![deny(unsafe_code, future_incompatible)]
#![allow(unstable_name_collisions)] // strict provenance - must come after `future_incompatible` to take precedence
#![warn(missing_docs)]
// Docs.rs
#![doc(html_root_url = "https://docs.rs/cstree/0.12.0-rc.0")]
#![cfg_attr(doc_cfg, feature(doc_cfg))]
#[allow(unsafe_code)]
mod green;
pub mod green;
#[allow(unsafe_code)]
mod syntax;
pub mod syntax;
#[allow(unsafe_code)]
pub mod interning;
#[cfg(feature = "serialize")]
mod serde_impls;
#[allow(missing_docs)]
mod utility_types;
pub mod interning;
use std::fmt;
// Reexport types for working with strings.
pub use text_size::{TextLen, TextRange, TextSize};
/// `RawSyntaxKind` is a type tag for each token or node.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct RawSyntaxKind(pub u16);
#[doc(inline)]
pub use crate::syntax::*;
pub use crate::{
green::{Checkpoint, GreenNode, GreenNodeBuilder, GreenNodeChildren, GreenToken, NodeCache, SyntaxKind},
utility_types::{Direction, NodeOrToken, TokenAtOffset, WalkEvent},
};
pub use triomphe::Arc;
/// Typesafe representations of text ranges and sizes.
pub mod text {
pub use crate::syntax::SyntaxText;
pub use text_size::{TextLen, TextRange, TextSize};
}
/// A tree builder for the construction of syntax trees.
///
/// Please refer to the documentation on [`GreenNodeBuilder`](build::GreenNodeBuilder) itself and the ["getting started"
/// section](../index.html#getting-started) from the top-level documentation for an introduction to how to build a
/// syntax tree.
pub mod build {
pub use crate::green::builder::{Checkpoint, GreenNodeBuilder, NodeCache};
}
/// A convenient collection of the most used parts of `cstree`.
pub mod prelude {
pub use crate::{
build::GreenNodeBuilder,
green::{GreenNode, GreenToken},
syntax::{SyntaxElement, SyntaxNode, SyntaxToken},
Language, RawSyntaxKind,
};
}
/// Types for syntax tree traversal / moving through trees.
pub mod traversal {
pub use crate::utility_types::{Direction, WalkEvent};
}
/// Utility types. It shouldn't be needed to reference these directly, but they are returned in several places in
/// `cstree` and may come in handy.
pub mod util {
pub use crate::utility_types::{NodeOrToken, TokenAtOffset};
}
/// Synchronization primitives.
pub mod sync {
/// An atomically reference counted shared pointer.
///
/// This is like [`Arc`](std::sync::Arc) in the standard library, but more efficient for how `cstree` stores
/// syntax trees internally. This Arc does not support weak reference counting.
pub use triomphe::Arc;
}
/// The `Language` trait is the bridge between the internal `cstree` representation and your
/// language's types.
@ -97,13 +441,13 @@ pub use triomphe::Arc;
/// impl cstree::Language for Lang {
/// type Kind = SyntaxKind;
///
/// fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
/// fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
/// assert!(raw.0 <= __LAST as u16);
/// unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
/// }
///
/// fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
/// cstree::SyntaxKind(kind as u16)
/// fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
/// cstree::RawSyntaxKind(kind as u16)
/// }
///
/// fn static_text(kind: Self::Kind) -> Option<&'static str> {
@ -115,29 +459,34 @@ pub use triomphe::Arc;
/// }
/// }
/// ```
///
/// [`SyntaxNode`]: crate::syntax::SyntaxNode
pub trait Language: Sized + Clone + Copy + fmt::Debug + Eq + Ord + std::hash::Hash {
/// A type that represents what items in your Language can be.
/// Typically, this is an `enum` with variants such as `Identifier`, `Literal`, ...
type Kind: Sized + Clone + Copy + fmt::Debug;
/// Construct a semantic item kind from the compact representation.
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind;
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind;
/// Convert a semantic item kind into a more compact representation.
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind;
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind;
/// Fixed text for a particular syntax kind.
///
/// Implement for kinds that will only ever represent the same text, such as punctuation (like a
/// semicolon), keywords (like `fn`), or operators (like `<=`).
///
/// Indicating tokens that have a `static_text` this way allows `cstree` to store them more efficiently, which makes
/// it faster to add them to a syntax tree and to look up their text. Since there can often be many occurrences
/// of these tokens inside a file, doing so will improve the performance of using `cstree`.
fn static_text(kind: Self::Kind) -> Option<&'static str>;
}
#[doc(hidden)]
#[allow(unsafe_code, unused)]
pub mod testing {
pub use crate::*;
pub fn parse<L: Language, I>(_b: &mut super::GreenNodeBuilder<L, I>, _s: &str) {}
pub use crate::prelude::*;
pub fn parse<L: Language, I>(_b: &mut GreenNodeBuilder<L, I>, _s: &str) {}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
@ -160,13 +509,13 @@ pub mod testing {
impl Language for TestLang {
type Kind = TestSyntaxKind;
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
assert!(raw.0 <= TestSyntaxKind::__LAST as u16);
unsafe { std::mem::transmute::<u16, TestSyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
SyntaxKind(kind as u16)
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
RawSyntaxKind(kind as u16)
}
fn static_text(kind: Self::Kind) -> Option<&'static str> {

View file

@ -1,8 +1,12 @@
//! Serialization and Deserialization for syntax trees.
use crate::{
interning::{IntoResolver, Resolver},
GreenNodeBuilder, Language, NodeOrToken, ResolvedNode, SyntaxKind, SyntaxNode, WalkEvent,
build::GreenNodeBuilder,
interning::{Resolver, TokenKey},
syntax::{ResolvedNode, SyntaxNode},
traversal::WalkEvent,
util::NodeOrToken,
Language, RawSyntaxKind,
};
use serde::{
de::{Error, SeqAccess, Visitor},
@ -77,8 +81,8 @@ enum Event<'text> {
/// The second parameter indicates if this node needs data.
/// If the boolean is true, the next element inside the data list
/// must be attached to this node.
EnterNode(SyntaxKind, bool),
Token(SyntaxKind, &'text str),
EnterNode(RawSyntaxKind, bool),
Token(RawSyntaxKind, &'text str),
LeaveNode,
}
@ -97,7 +101,7 @@ pub(crate) struct SerializeWithData<'node, 'resolver, L: Language, D: 'static, R
impl<L, D, R> Serialize for SerializeWithData<'_, '_, L, D, R>
where
L: Language,
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
D: Serialize,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
@ -112,7 +116,7 @@ where
impl<L, D, R> Serialize for SerializeWithResolver<'_, '_, L, D, R>
where
L: Language,
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
@ -192,8 +196,7 @@ where
}
let (tree, cache) = builder.finish();
let tree =
ResolvedNode::new_root_with_resolver(tree, cache.unwrap().into_interner().unwrap().into_resolver());
let tree = ResolvedNode::new_root_with_resolver(tree, cache.unwrap().into_interner().unwrap());
Ok((tree, data_indices))
}
}
@ -236,7 +239,7 @@ where
}
}
impl Serialize for SyntaxKind {
impl Serialize for RawSyntaxKind {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
@ -245,7 +248,7 @@ impl Serialize for SyntaxKind {
}
}
impl<'de> Deserialize<'de> for SyntaxKind {
impl<'de> Deserialize<'de> for RawSyntaxKind {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,

View file

@ -1,10 +1,14 @@
use std::{fmt, sync::atomic::AtomicU32};
use lasso::Resolver;
use text_size::{TextRange, TextSize};
use super::*;
use crate::{green::GreenElementRef, Language, NodeOrToken, SyntaxKind, TokenAtOffset};
use crate::{
green::GreenElementRef,
interning::{Resolver, TokenKey},
util::{NodeOrToken, TokenAtOffset},
Language, RawSyntaxKind,
};
/// An element of the tree, can be either a node or a token.
pub type SyntaxElement<L, D = ()> = NodeOrToken<SyntaxNode<L, D>, SyntaxToken<L, D>>;
@ -27,7 +31,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
/// To avoid allocating for every element, see [`write_display`](type.SyntaxElement.html#method.write_display).
pub fn display<R>(&self, resolver: &R) -> String
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
match self {
NodeOrToken::Node(it) => it.display(resolver),
@ -38,7 +42,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
/// Writes this element's [`Display`](fmt::Display) representation into the given `target`.
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
match self {
NodeOrToken::Node(it) => it.write_display(resolver, target),
@ -53,7 +57,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
/// To avoid allocating for every element, see [`write_debug`](type.SyntaxElement.html#method.write_debug).
pub fn debug<R>(&self, resolver: &R, recursive: bool) -> String
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
match self {
NodeOrToken::Node(it) => it.debug(resolver, recursive),
@ -66,7 +70,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
/// Otherwise, only this element's kind and range are written.
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
match self {
NodeOrToken::Node(it) => it.write_debug(resolver, target, recursive),
@ -105,7 +109,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
/// To avoid allocating for every element, see [`write_display`](type.SyntaxElementRef.html#method.write_display).
pub fn display<R>(&self, resolver: &R) -> String
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
match self {
NodeOrToken::Node(it) => it.display(resolver),
@ -116,7 +120,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
/// Writes this element's [`Display`](fmt::Display) representation into the given `target`.
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
match self {
NodeOrToken::Node(it) => it.write_display(resolver, target),
@ -131,7 +135,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
/// To avoid allocating for every element, see [`write_debug`](type.SyntaxElementRef.html#method.write_debug).
pub fn debug<R>(&self, resolver: &R, recursive: bool) -> String
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
match self {
NodeOrToken::Node(it) => it.debug(resolver, recursive),
@ -144,7 +148,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
/// Otherwise, only this element's kind and range are written.
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
match self {
NodeOrToken::Node(it) => it.write_debug(resolver, target, recursive),
@ -162,8 +166,8 @@ impl<L: Language, D> SyntaxElement<L, D> {
ref_count: *mut AtomicU32,
) -> SyntaxElement<L, D> {
match element {
NodeOrToken::Node(node) => SyntaxNode::new_child(node, parent, index as u32, offset, ref_count).into(),
NodeOrToken::Token(_) => SyntaxToken::new(parent, index as u32, offset).into(),
NodeOrToken::Node(node) => SyntaxNode::new_child(node, parent, index, offset, ref_count).into(),
NodeOrToken::Token(_) => SyntaxToken::new(parent, index, offset).into(),
}
}
@ -178,7 +182,7 @@ impl<L: Language, D> SyntaxElement<L, D> {
/// The internal representation of the kind of this element.
#[inline]
pub fn syntax_kind(&self) -> SyntaxKind {
pub fn syntax_kind(&self) -> RawSyntaxKind {
match self {
NodeOrToken::Node(it) => it.syntax_kind(),
NodeOrToken::Token(it) => it.syntax_kind(),
@ -261,7 +265,7 @@ impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> {
/// The internal representation of the kind of this element.
#[inline]
pub fn syntax_kind(&self) -> SyntaxKind {
pub fn syntax_kind(&self) -> RawSyntaxKind {
match self {
NodeOrToken::Node(it) => it.syntax_kind(),
NodeOrToken::Token(it) => it.syntax_kind(),

View file

@ -4,7 +4,11 @@ use std::iter::FusedIterator;
use text_size::TextSize;
use crate::{green::GreenElementRef, GreenNodeChildren, Language, SyntaxElementRef, SyntaxNode};
use crate::{
green::{GreenElementRef, GreenNodeChildren},
syntax::{SyntaxElementRef, SyntaxNode},
Language,
};
#[derive(Clone, Debug)]
struct Iter<'n> {

View file

@ -36,6 +36,7 @@ pub use text::SyntaxText;
#[cfg(test)]
mod tests {
use super::*;
use crate::testing::*;
#[test]

View file

@ -2,9 +2,12 @@ use super::*;
#[cfg(feature = "serialize")]
use crate::serde_impls::{SerializeWithData, SerializeWithResolver};
use crate::{
green::{GreenElementRef, SyntaxKind},
interning::Resolver,
*,
green::{GreenElementRef, GreenNode},
interning::{Resolver, TokenKey},
text::*,
traversal::*,
util::*,
Language, RawSyntaxKind,
};
use parking_lot::RwLock;
use std::{
@ -39,7 +42,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
/// Otherwise, only this node's kind and range are written.
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
if recursive {
let mut level = 0;
@ -71,7 +74,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
#[inline]
pub fn debug<R>(&self, resolver: &R, recursive: bool) -> String
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
// NOTE: `fmt::Write` methods on `String` never fail
let mut res = String::new();
@ -82,7 +85,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
/// Writes this node's [`Display`](fmt::Display) representation into the given `target`.
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
self.preorder_with_tokens()
.filter_map(|event| match event {
@ -98,7 +101,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
#[inline]
pub fn display<R>(&self, resolver: &R) -> String
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
// NOTE: `fmt::Write` methods on `String` never fail
let mut res = String::new();
@ -107,21 +110,22 @@ impl<L: Language, D> SyntaxNode<L, D> {
}
/// If there is a resolver associated with this tree, returns it.
pub fn resolver(&self) -> Option<&StdArc<dyn Resolver>> {
pub fn resolver(&self) -> Option<&StdArc<dyn Resolver<TokenKey>>> {
match &self.root().data().kind {
Kind::Root(_, resolver) => resolver.as_ref(),
_ => unreachable!(),
}
}
/// Turns this node into a [`ResolvedNode`], but only if there is a resolver associated with this tree.
/// Turns this node into a [`ResolvedNode`](crate::syntax::ResolvedNode), but only if there is a resolver associated
/// with this tree.
#[inline]
pub fn try_resolved(&self) -> Option<&ResolvedNode<L, D>> {
// safety: we only coerce if `resolver` exists
self.resolver().map(|_| unsafe { ResolvedNode::coerce_ref(self) })
}
/// Turns this node into a [`ResolvedNode`].
/// Turns this node into a [`ResolvedNode`](crate::syntax::ResolvedNode).
/// # Panics
/// If there is no resolver associated with this tree.
#[inline]
@ -233,7 +237,7 @@ impl<L: Language, D> Hash for SyntaxNode<L, D> {
}
enum Kind<L: Language, D: 'static> {
Root(GreenNode, Option<StdArc<dyn Resolver>>),
Root(GreenNode, Option<StdArc<dyn Resolver<TokenKey>>>),
Child {
parent: SyntaxNode<L, D>,
index: u32,
@ -300,7 +304,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
Self { data }
}
fn make_new_root(green: GreenNode, resolver: Option<StdArc<dyn Resolver>>) -> Self {
fn make_new_root(green: GreenNode, resolver: Option<StdArc<dyn Resolver<TokenKey>>>) -> Self {
let ref_count = Box::new(AtomicU32::new(1));
let n_children = green.children().count();
let data = NodeData::new(
@ -328,6 +332,8 @@ impl<L: Language, D> SyntaxNode<L, D> {
/// # Example
/// ```
/// # use cstree::testing::*;
/// use cstree::syntax::ResolvedNode;
///
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
/// builder.start_node(Root);
/// builder.token(Identifier, "content");
@ -342,8 +348,8 @@ impl<L: Language, D> SyntaxNode<L, D> {
/// assert_eq!(root.text(), "content");
/// ```
#[inline]
pub fn new_root_with_resolver(green: GreenNode, resolver: impl Resolver + 'static) -> ResolvedNode<L, D> {
let ptr: StdArc<dyn Resolver> = StdArc::new(resolver);
pub fn new_root_with_resolver(green: GreenNode, resolver: impl Resolver<TokenKey> + 'static) -> ResolvedNode<L, D> {
let ptr: StdArc<dyn Resolver<TokenKey>> = StdArc::new(resolver);
ResolvedNode {
syntax: SyntaxNode::make_new_root(green, Some(ptr)),
}
@ -517,7 +523,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
/// The internal representation of the kind of this node.
#[inline]
pub fn syntax_kind(&self) -> SyntaxKind {
pub fn syntax_kind(&self) -> RawSyntaxKind {
self.green().kind()
}
@ -543,7 +549,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
#[inline]
pub fn resolve_text<'n, 'i, I>(&'n self, resolver: &'i I) -> SyntaxText<'n, 'i, I, L, D>
where
I: Resolver + ?Sized,
I: Resolver<TokenKey> + ?Sized,
{
SyntaxText::new(self, resolver)
}
@ -911,7 +917,7 @@ where
/// including the data and by using an external resolver.
pub fn as_serialize_with_data_with_resolver<'node>(
&'node self,
resolver: &'node impl Resolver,
resolver: &'node impl Resolver<TokenKey>,
) -> impl serde::Serialize + 'node
where
D: serde::Serialize,
@ -923,7 +929,7 @@ where
/// which uses the given resolver instead of the resolver inside the tree.
pub fn as_serialize_with_resolver<'node>(
&'node self,
resolver: &'node impl Resolver,
resolver: &'node impl Resolver<TokenKey>,
) -> impl serde::Serialize + 'node {
SerializeWithResolver { node: self, resolver }
}

View file

@ -9,12 +9,15 @@ use std::{
sync::Arc as StdArc,
};
use lasso::Resolver;
use text_size::{TextRange, TextSize};
use crate::{
Direction, GreenNode, Language, NodeOrToken, SyntaxElementRef, SyntaxKind, SyntaxNode, SyntaxText, SyntaxToken,
TokenAtOffset, WalkEvent,
green::GreenNode,
interning::{Resolver, TokenKey},
syntax::*,
traversal::*,
util::*,
Language, RawSyntaxKind,
};
/// Syntax tree node that is guaranteed to belong to a tree that contains an associated
@ -109,7 +112,7 @@ impl<L: Language, D> DerefMut for ResolvedToken<L, D> {
/// An element of the tree that is guaranteed to belong to a tree that contains an associated
/// [`Resolver`](lasso::Resolver), can be either a node or a token.
/// # See also
/// [`SyntaxElement`](crate::SyntaxElement)
/// [`SyntaxElement`](crate::syntax::SyntaxElement)
pub type ResolvedElement<L, D = ()> = NodeOrToken<ResolvedNode<L, D>, ResolvedToken<L, D>>;
impl<L: Language, D> From<ResolvedNode<L, D>> for ResolvedElement<L, D> {
@ -126,7 +129,7 @@ impl<L: Language, D> From<ResolvedToken<L, D>> for ResolvedElement<L, D> {
impl<L: Language, D> ResolvedElement<L, D> {
#[allow(missing_docs)]
pub fn display(&self, resolver: &impl Resolver) -> String {
pub fn display(&self, resolver: &impl Resolver<TokenKey>) -> String {
match self {
NodeOrToken::Node(it) => it.display(resolver),
NodeOrToken::Token(it) => it.display(resolver),
@ -177,7 +180,7 @@ impl<L: Language, D> ResolvedNode<L, D> {
/// source text covered by this node, i.e. the combined text of all token leafs of the subtree
/// originating in this node.
#[inline]
pub fn text(&self) -> SyntaxText<'_, '_, dyn Resolver, L, D> {
pub fn text(&self) -> SyntaxText<'_, '_, dyn Resolver<TokenKey>, L, D> {
SyntaxText::new(self, &**self.resolver())
}
}
@ -266,13 +269,13 @@ macro_rules! forward_node {
impl<L: Language, D> ResolvedNode<L, D> {
/// Returns the [`Resolver`] associated with this tree.
pub fn resolver(&self) -> &StdArc<dyn Resolver> {
pub fn resolver(&self) -> &StdArc<dyn Resolver<TokenKey>> {
self.syntax.resolver().unwrap()
}
/// See [`SyntaxNode::new_root_with_resolver`].
#[inline]
pub fn new_root_with_resolver(green: GreenNode, resolver: impl Resolver + 'static) -> Self {
pub fn new_root_with_resolver(green: GreenNode, resolver: impl Resolver<TokenKey> + 'static) -> Self {
SyntaxNode::new_root_with_resolver(green, resolver)
}
@ -498,7 +501,7 @@ impl<L: Language, D> ResolvedNode<L, D> {
impl<L: Language, D> ResolvedToken<L, D> {
/// Returns the [`Resolver`] associated with this tree.
pub fn resolver(&self) -> &StdArc<dyn Resolver> {
pub fn resolver(&self) -> &StdArc<dyn Resolver<TokenKey>> {
self.syntax.resolver().unwrap()
}
@ -575,7 +578,7 @@ impl<L: Language, D> ResolvedElement<L, D> {
/// The internal representation of the kind of this element.
#[inline]
pub fn syntax_kind(&self) -> SyntaxKind {
pub fn syntax_kind(&self) -> RawSyntaxKind {
match self {
NodeOrToken::Node(it) => it.syntax_kind(),
NodeOrToken::Token(it) => it.syntax_kind(),
@ -658,7 +661,7 @@ impl<'a, L: Language, D> ResolvedElementRef<'a, L, D> {
/// The internal representation of the kind of this element.
#[inline]
pub fn syntax_kind(&self) -> SyntaxKind {
pub fn syntax_kind(&self) -> RawSyntaxKind {
match self {
NodeOrToken::Node(it) => it.syntax_kind(),
NodeOrToken::Token(it) => it.syntax_kind(),

View file

@ -2,7 +2,12 @@
use std::fmt;
use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, TextSize};
use crate::{
interning::{Resolver, TokenKey},
syntax::{SyntaxNode, SyntaxToken},
text::{TextRange, TextSize},
Language,
};
/// An efficient representation of the text that is covered by a [`SyntaxNode`], i.e. the combined
/// source text of all tokens that are descendants of the node.
@ -14,7 +19,7 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T
/// # Example
/// ```
/// # use cstree::testing::*;
/// # use cstree::interning::IntoResolver;
/// # use cstree::syntax::ResolvedNode;
/// #
/// fn parse_float_literal(s: &str) -> ResolvedNode<MyLanguage> {
/// // parsing...
@ -23,7 +28,7 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T
/// # builder.token(Float, s);
/// # builder.finish_node();
/// # let (root, cache) = builder.finish();
/// # let resolver = cache.unwrap().into_interner().unwrap().into_resolver();
/// # let resolver = cache.unwrap().into_interner().unwrap();
/// # SyntaxNode::new_root_with_resolver(root, resolver)
/// }
/// let float_node = parse_float_literal("2.748E2");
@ -42,7 +47,7 @@ pub struct SyntaxText<'n, 'i, I: ?Sized, L: Language, D: 'static = ()> {
resolver: &'i I,
}
impl<'n, 'i, I: Resolver + ?Sized, L: Language, D> SyntaxText<'n, 'i, I, L, D> {
impl<'n, 'i, I: Resolver<TokenKey> + ?Sized, L: Language, D> SyntaxText<'n, 'i, I, L, D> {
pub(crate) fn new(node: &'n SyntaxNode<L, D>, resolver: &'i I) -> Self {
let range = node.text_range();
SyntaxText { node, range, resolver }
@ -203,25 +208,25 @@ fn found<T>(res: Result<(), T>) -> Option<T> {
}
}
impl<I: Resolver + ?Sized, L: Language, D> fmt::Debug for SyntaxText<'_, '_, I, L, D> {
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> fmt::Debug for SyntaxText<'_, '_, I, L, D> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.to_string(), f)
}
}
impl<I: Resolver + ?Sized, L: Language, D> fmt::Display for SyntaxText<'_, '_, I, L, D> {
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> fmt::Display for SyntaxText<'_, '_, I, L, D> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.try_for_each_chunk(|chunk| fmt::Display::fmt(chunk, f))
}
}
impl<I: Resolver + ?Sized, L: Language, D> From<SyntaxText<'_, '_, I, L, D>> for String {
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> From<SyntaxText<'_, '_, I, L, D>> for String {
fn from(text: SyntaxText<'_, '_, I, L, D>) -> String {
text.to_string()
}
}
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<str> for SyntaxText<'_, '_, I, L, D> {
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> PartialEq<str> for SyntaxText<'_, '_, I, L, D> {
fn eq(&self, mut rhs: &str) -> bool {
self.try_for_each_chunk(|chunk| {
if !rhs.starts_with(chunk) {
@ -235,19 +240,19 @@ impl<I: Resolver + ?Sized, L: Language, D> PartialEq<str> for SyntaxText<'_, '_,
}
}
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for str {
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for str {
fn eq(&self, rhs: &SyntaxText<'_, '_, I, L, D>) -> bool {
rhs == self
}
}
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<&'_ str> for SyntaxText<'_, '_, I, L, D> {
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> PartialEq<&'_ str> for SyntaxText<'_, '_, I, L, D> {
fn eq(&self, rhs: &&str) -> bool {
self == *rhs
}
}
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for &'_ str {
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for &'_ str {
fn eq(&self, rhs: &SyntaxText<'_, '_, I, L, D>) -> bool {
rhs == self
}
@ -258,8 +263,8 @@ impl<'n1, 'i1, 'n2, 'i2, I1, I2, L1, L2, D1, D2> PartialEq<SyntaxText<'n2, 'i2,
where
L1: Language,
L2: Language,
I1: Resolver + ?Sized,
I2: Resolver + ?Sized,
I1: Resolver<TokenKey> + ?Sized,
I2: Resolver<TokenKey> + ?Sized,
{
fn eq(&self, other: &SyntaxText<'_, '_, I2, L2, D2>) -> bool {
if self.range.len() != other.range.len() {
@ -282,8 +287,8 @@ fn zip_texts<'it1, 'it2, It1, It2, I1, I2, L1, L2, D1, D2>(
where
It1: Iterator<Item = (&'it1 SyntaxToken<L1, D1>, TextRange)>,
It2: Iterator<Item = (&'it2 SyntaxToken<L2, D2>, TextRange)>,
I1: Resolver + ?Sized,
I2: Resolver + ?Sized,
I1: Resolver<TokenKey> + ?Sized,
I2: Resolver<TokenKey> + ?Sized,
D1: 'static,
D2: 'static,
L1: Language + 'it1,
@ -309,12 +314,12 @@ where
}
}
impl<I: Resolver + ?Sized, L: Language, D> Eq for SyntaxText<'_, '_, I, L, D> {}
impl<I: Resolver<TokenKey> + ?Sized, L: Language, D> Eq for SyntaxText<'_, '_, I, L, D> {}
mod private {
use std::ops;
use crate::{TextRange, TextSize};
use crate::text::{TextRange, TextSize};
pub trait SyntaxTextRange {
fn start(&self) -> Option<TextSize>;
@ -374,27 +379,27 @@ mod private {
#[cfg(test)]
mod tests {
use crate::{green::SyntaxKind, GreenNodeBuilder};
use crate::{build::GreenNodeBuilder, RawSyntaxKind};
use super::*;
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum TestLang {}
impl Language for TestLang {
type Kind = SyntaxKind;
type Kind = RawSyntaxKind;
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
raw
}
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
kind
}
fn static_text(kind: Self::Kind) -> Option<&'static str> {
if kind == SyntaxKind(1) {
if kind == RawSyntaxKind(1) {
Some("{")
} else if kind == SyntaxKind(2) {
} else if kind == RawSyntaxKind(2) {
Some("}")
} else {
None
@ -402,16 +407,16 @@ mod tests {
}
}
fn build_tree(chunks: &[&str]) -> (SyntaxNode<TestLang, ()>, impl Resolver) {
fn build_tree(chunks: &[&str]) -> (SyntaxNode<TestLang, ()>, impl Resolver<TokenKey>) {
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
builder.start_node(SyntaxKind(62));
builder.start_node(RawSyntaxKind(62));
for &chunk in chunks.iter() {
let kind = match chunk {
"{" => 1,
"}" => 2,
_ => 3,
};
builder.token(SyntaxKind(kind), chunk);
builder.token(RawSyntaxKind(kind), chunk);
}
builder.finish_node();
let (node, cache) = builder.finish();

View file

@ -5,11 +5,15 @@ use std::{
sync::Arc as StdArc,
};
use lasso::Resolver;
use text_size::{TextRange, TextSize};
use super::*;
use crate::{interning::Key, Direction, GreenNode, GreenToken, Language, SyntaxKind};
use crate::{
green::{GreenNode, GreenToken},
interning::{Resolver, TokenKey},
traversal::Direction,
Language, RawSyntaxKind,
};
/// Syntax tree token.
#[derive(Debug)]
@ -49,7 +53,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
/// Writes this token's [`Debug`](fmt::Debug) representation into the given `target`.
pub fn write_debug<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
write!(target, "{:?}@{:?}", self.kind(), self.text_range())?;
let text = self.resolve_text(resolver);
@ -72,7 +76,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
#[inline]
pub fn debug<R>(&self, resolver: &R) -> String
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
// NOTE: `fmt::Write` methods on `String` never fail
let mut res = String::new();
@ -84,7 +88,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
#[inline]
pub fn write_display<R>(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
write!(target, "{}", self.resolve_text(resolver))
}
@ -95,25 +99,26 @@ impl<L: Language, D> SyntaxToken<L, D> {
#[inline]
pub fn display<R>(&self, resolver: &R) -> String
where
R: Resolver + ?Sized,
R: Resolver<TokenKey> + ?Sized,
{
self.resolve_text(resolver).to_string()
}
/// If there is a resolver associated with this tree, returns it.
#[inline]
pub fn resolver(&self) -> Option<&StdArc<dyn Resolver>> {
pub fn resolver(&self) -> Option<&StdArc<dyn Resolver<TokenKey>>> {
self.parent.resolver()
}
/// Turns this token into a [`ResolvedToken`], but only if there is a resolver associated with this tree.
/// Turns this token into a [`ResolvedToken`](crate::syntax::ResolvedToken), but only if there is a resolver
/// associated with this tree.
#[inline]
pub fn try_resolved(&self) -> Option<&ResolvedToken<L, D>> {
// safety: we only coerce if `resolver` exists
self.resolver().map(|_| unsafe { ResolvedToken::coerce_ref(self) })
}
/// Turns this token into a [`ResolvedToken`].
/// Turns this token into a [`ResolvedToken`](crate::syntax::ResolvedToken).
/// # Panics
/// If there is no resolver associated with this tree.
#[inline]
@ -153,7 +158,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
/// The internal representation of the kind of this token.
#[inline]
pub fn syntax_kind(&self) -> SyntaxKind {
pub fn syntax_kind(&self) -> RawSyntaxKind {
self.green().kind()
}
@ -176,7 +181,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
#[inline]
pub fn resolve_text<'i, I>(&self, resolver: &'i I) -> &'i str
where
I: Resolver + ?Sized,
I: Resolver<TokenKey> + ?Sized,
{
// one of the two must be present upon construction
self.static_text().or_else(|| self.green().text(resolver)).unwrap()
@ -191,6 +196,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
///
/// ```
/// # use cstree::testing::*;
/// # use cstree::build::*;
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
/// # builder.start_node(Root);
/// # builder.token(Identifier, "x");
@ -278,18 +284,18 @@ impl<L: Language, D> SyntaxToken<L, D> {
/// implementation by re-using the interner in both.
/// ```
/// # use cstree::testing::*;
/// use cstree::interning::{new_interner, Hasher, Key, Rodeo};
/// use cstree::interning::{new_interner, TokenInterner, TokenKey};
/// struct TypeTable {
/// // ...
/// }
/// impl TypeTable {
/// fn type_of(&self, ident: Key) -> &str {
/// fn type_of(&self, ident: TokenKey) -> &str {
/// // ...
/// # ""
/// }
/// }
/// # struct State {
/// # interner: Rodeo,
/// # interner: TokenInterner,
/// # type_table: TypeTable,
/// # }
/// let interner = new_interner();
@ -297,7 +303,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
/// interner,
/// type_table: TypeTable{ /* stuff */},
/// };
/// let mut builder: GreenNodeBuilder<MyLanguage, Rodeo> =
/// let mut builder: GreenNodeBuilder<MyLanguage, TokenInterner> =
/// GreenNodeBuilder::with_interner(&mut state.interner);
/// # let input = "";
/// # builder.start_node(Root);
@ -315,7 +321,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
/// let typ = type_table.type_of(ident.text_key().unwrap());
/// ```
#[inline]
pub fn text_key(&self) -> Option<Key> {
pub fn text_key(&self) -> Option<TokenKey> {
self.green().text_key()
}

View file

@ -109,7 +109,7 @@ impl<T> std::ops::Deref for MaybeOwned<'_, T> {
fn deref(&self) -> &T {
match self {
MaybeOwned::Owned(it) => it,
MaybeOwned::Borrowed(it) => *it,
MaybeOwned::Borrowed(it) => it,
}
}
}
@ -118,7 +118,7 @@ impl<T> std::ops::DerefMut for MaybeOwned<'_, T> {
fn deref_mut(&mut self) -> &mut T {
match self {
MaybeOwned::Owned(it) => it,
MaybeOwned::Borrowed(it) => *it,
MaybeOwned::Borrowed(it) => it,
}
}
}

View file

@ -1,6 +1,10 @@
use super::*;
use cstree::{GreenNodeBuilder, NodeCache, SyntaxKind, TextRange};
use lasso::{Resolver, Rodeo};
use cstree::{
build::{GreenNodeBuilder, NodeCache},
interning::{new_interner, Resolver},
text::TextRange,
RawSyntaxKind,
};
fn build_tree<D>(root: &Element<'_>) -> (SyntaxNode<D>, impl Resolver) {
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
@ -31,20 +35,20 @@ fn tree_with_eq_tokens() -> Element<'static> {
fn create() {
let tree = two_level_tree();
let (tree, resolver) = build_tree::<()>(&tree);
assert_eq!(tree.syntax_kind(), SyntaxKind(0));
assert_eq!(tree.kind(), SyntaxKind(0));
assert_eq!(tree.syntax_kind(), RawSyntaxKind(0));
assert_eq!(tree.kind(), RawSyntaxKind(0));
{
let leaf1_0 = tree.children().nth(1).unwrap().children_with_tokens().next().unwrap();
let leaf1_0 = leaf1_0.into_token().unwrap();
assert_eq!(leaf1_0.syntax_kind(), SyntaxKind(5));
assert_eq!(leaf1_0.kind(), SyntaxKind(5));
assert_eq!(leaf1_0.syntax_kind(), RawSyntaxKind(5));
assert_eq!(leaf1_0.kind(), RawSyntaxKind(5));
assert_eq!(leaf1_0.resolve_text(&resolver), "1.0");
assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into()));
}
{
let node2 = tree.children().nth(2).unwrap();
assert_eq!(node2.syntax_kind(), SyntaxKind(6));
assert_eq!(node2.kind(), SyntaxKind(6));
assert_eq!(node2.syntax_kind(), RawSyntaxKind(6));
assert_eq!(node2.kind(), RawSyntaxKind(6));
assert_eq!(node2.children_with_tokens().count(), 3);
assert_eq!(node2.resolve_text(&resolver), "2.02.12.2");
}
@ -54,7 +58,7 @@ fn create() {
fn token_text_eq() {
let tree = tree_with_eq_tokens();
let (tree, _) = build_tree::<()>(&tree);
assert_eq!(tree.kind(), SyntaxKind(0));
assert_eq!(tree.kind(), RawSyntaxKind(0));
let leaf0_0 = tree.children().next().unwrap().children_with_tokens().next().unwrap();
let leaf0_0 = leaf0_0.into_token().unwrap();
@ -115,7 +119,7 @@ fn data() {
#[test]
fn with_interner() {
let mut interner = Rodeo::new();
let mut interner = new_interner();
let mut cache = NodeCache::with_interner(&mut interner);
let tree = two_level_tree();
let tree = build_tree_with_cache(&tree, &mut cache);
@ -135,7 +139,7 @@ fn with_interner() {
#[test]
fn inline_resolver() {
let mut interner = Rodeo::new();
let mut interner = new_interner();
let mut cache = NodeCache::with_interner(&mut interner);
let tree = two_level_tree();
let tree = build_tree_with_cache(&tree, &mut cache);
@ -146,7 +150,7 @@ fn inline_resolver() {
assert_eq!(leaf1_0.text(), "1.0");
assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into()));
assert_eq!(format!("{}", leaf1_0), leaf1_0.text());
assert_eq!(format!("{:?}", leaf1_0), "SyntaxKind(5)@6..9 \"1.0\"");
assert_eq!(format!("{:?}", leaf1_0), "RawSyntaxKind(5)@6..9 \"1.0\"");
}
{
let node2 = tree.children().nth(2).unwrap();
@ -154,13 +158,13 @@ fn inline_resolver() {
let resolver = node2.resolver();
assert_eq!(node2.resolve_text(resolver.as_ref()), node2.text());
assert_eq!(format!("{}", node2).as_str(), node2.text());
assert_eq!(format!("{:?}", node2), "SyntaxKind(6)@9..18");
assert_eq!(format!("{:?}", node2), "RawSyntaxKind(6)@9..18");
assert_eq!(
format!("{:#?}", node2),
r#"SyntaxKind(6)@9..18
SyntaxKind(7)@9..12 "2.0"
SyntaxKind(8)@12..15 "2.1"
SyntaxKind(9)@15..18 "2.2"
r#"RawSyntaxKind(6)@9..18
RawSyntaxKind(7)@9..12 "2.0"
RawSyntaxKind(8)@12..15 "2.1"
RawSyntaxKind(9)@15..18 "2.2"
"#
);
}
@ -175,7 +179,7 @@ fn assert_debug_display() {
f::<ResolvedToken>();
f::<ResolvedElement>();
f::<ResolvedElementRef<'static>>();
f::<cstree::NodeOrToken<String, u128>>();
f::<cstree::util::NodeOrToken<String, u128>>();
fn dbg<T: fmt::Debug>() {}
dbg::<GreenNodeBuilder<'static, 'static, TestLang>>();

View file

@ -4,18 +4,22 @@ mod sendsync;
#[cfg(feature = "serialize")]
mod serde;
use cstree::{GreenNode, GreenNodeBuilder, Language, NodeCache, SyntaxKind};
use lasso::Interner;
use cstree::{
build::{GreenNodeBuilder, NodeCache},
green::GreenNode,
interning::Interner,
Language, RawSyntaxKind,
};
pub type SyntaxNode<D = ()> = cstree::SyntaxNode<TestLang, D>;
pub type SyntaxToken<D = ()> = cstree::SyntaxToken<TestLang, D>;
pub type SyntaxElement<D = ()> = cstree::SyntaxElement<TestLang, D>;
pub type SyntaxElementRef<'a, D = ()> = cstree::SyntaxElementRef<'a, TestLang, D>;
pub type SyntaxNode<D = ()> = cstree::syntax::SyntaxNode<TestLang, D>;
pub type SyntaxToken<D = ()> = cstree::syntax::SyntaxToken<TestLang, D>;
pub type SyntaxElement<D = ()> = cstree::syntax::SyntaxElement<TestLang, D>;
pub type SyntaxElementRef<'a, D = ()> = cstree::syntax::SyntaxElementRef<'a, TestLang, D>;
pub type ResolvedNode<D = ()> = cstree::ResolvedNode<TestLang, D>;
pub type ResolvedToken<D = ()> = cstree::ResolvedToken<TestLang, D>;
pub type ResolvedElement<D = ()> = cstree::ResolvedElement<TestLang, D>;
pub type ResolvedElementRef<'a, D = ()> = cstree::ResolvedElementRef<'a, TestLang, D>;
pub type ResolvedNode<D = ()> = cstree::syntax::ResolvedNode<TestLang, D>;
pub type ResolvedToken<D = ()> = cstree::syntax::ResolvedToken<TestLang, D>;
pub type ResolvedElement<D = ()> = cstree::syntax::ResolvedElement<TestLang, D>;
pub type ResolvedElementRef<'a, D = ()> = cstree::syntax::ResolvedElementRef<'a, TestLang, D>;
#[derive(Debug)]
pub enum Element<'s> {
@ -26,13 +30,13 @@ pub enum Element<'s> {
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum TestLang {}
impl Language for TestLang {
type Kind = SyntaxKind;
type Kind = RawSyntaxKind;
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
raw
}
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
kind
}
@ -41,7 +45,7 @@ impl Language for TestLang {
}
}
pub fn build_tree_with_cache<'c, 'i, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode
pub fn build_tree_with_cache<I>(root: &Element<'_>, cache: &mut NodeCache<'_, I>) -> GreenNode
where
I: Interner,
{
@ -52,25 +56,21 @@ where
node
}
pub fn build_recursive<'c, 'i, L, I>(
root: &Element<'_>,
builder: &mut GreenNodeBuilder<'c, 'i, L, I>,
mut from: u16,
) -> u16
pub fn build_recursive<L, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u16) -> u16
where
L: Language<Kind = SyntaxKind>,
L: Language<Kind = RawSyntaxKind>,
I: Interner,
{
match root {
Element::Node(children) => {
builder.start_node(SyntaxKind(from));
builder.start_node(RawSyntaxKind(from));
for child in children {
from = build_recursive(child, builder, from + 1);
}
builder.finish_node();
}
Element::Token(text) => {
builder.token(SyntaxKind(from), *text);
builder.token(RawSyntaxKind(from), text);
}
}
from

View file

@ -3,7 +3,7 @@ fn empty_tree_arc() {
// this test is not here for the test itself, but to run it through MIRI, who complained about out-of-bound
// `ThinArc` pointers for a root `GreenNode` with no children
use cstree::*;
use cstree::{build::GreenNodeBuilder, syntax::SyntaxNode};
#[allow(non_camel_case_types)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
@ -16,13 +16,13 @@ fn empty_tree_arc() {
// ...
type Kind = SyntaxKind;
fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
assert!(raw.0 <= SyntaxKind::Root as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
cstree::SyntaxKind(kind as u16)
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
cstree::RawSyntaxKind(kind as u16)
}
fn static_text(_kind: Self::Kind) -> Option<&'static str> {

View file

@ -4,13 +4,29 @@ use crossbeam_utils::thread::scope;
use std::{thread, time::Duration};
use super::{build_recursive, Element, ResolvedNode, SyntaxNode, TestLang};
use cstree::{interning::IntoResolver, GreenNodeBuilder};
use cstree::build::GreenNodeBuilder;
// Excercise the multi-threaded interner when the corresponding feature is enabled.
#[cfg(feature = "multi_threaded_interning")]
use cstree::interning::{new_threaded_interner, MultiThreadedTokenInterner};
#[cfg(not(feature = "multi_threaded_interning"))]
fn get_builder() -> GreenNodeBuilder<'static, 'static, TestLang> {
GreenNodeBuilder::new()
}
#[cfg(feature = "multi_threaded_interning")]
fn get_builder() -> GreenNodeBuilder<'static, 'static, TestLang, MultiThreadedTokenInterner> {
let interner = new_threaded_interner();
GreenNodeBuilder::from_interner(interner)
}
fn build_tree<D>(root: &Element<'_>) -> ResolvedNode<D> {
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
let mut builder = get_builder();
build_recursive(root, &mut builder, 0);
let (node, cache) = builder.finish();
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap().into_resolver())
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap())
}
fn two_level_tree() -> Element<'static> {

View file

@ -2,8 +2,9 @@ use crate::{build_recursive, build_tree_with_cache, ResolvedNode};
use super::{Element, SyntaxNode, TestLang};
use cstree::{
interning::{new_interner, IntoResolver},
GreenNodeBuilder, NodeCache, NodeOrToken,
build::{GreenNodeBuilder, NodeCache},
interning::new_interner,
util::NodeOrToken,
};
use serde_test::Token;
use std::fmt;
@ -227,7 +228,7 @@ fn build_tree(root: Element<'_>) -> ResolvedNode<String> {
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
build_recursive(&root, &mut builder, 0);
let (node, cache) = builder.finish();
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap().into_resolver())
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap())
}
fn attach_data(node: &SyntaxNode<String>) {