1
Fork 0
mirror of https://github.com/RGBCube/cstree synced 2025-07-27 09:07:44 +00:00

Performance Improvements (#43)

- add `Language::static_text` and optimize static tokens
 - re-use existing `ThinArc`s in `GreenNodeBuilder::finish_node`
 - replace `*mut` in `SyntaxNode` with `NonNull`
 - add CHANGELOG
This commit is contained in:
DQ 2022-08-25 22:22:45 +02:00 committed by GitHub
parent 9be9dc9597
commit e7b00a603e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 575 additions and 371 deletions

8
CHANGELOG.md Normal file
View file

@ -0,0 +1,8 @@
# Changelog
## `v0.12.0`
* Introduced `Language::static_text` to optimize tokens that always appear with the same text (estimated 10-15% faster tree building when used, depending on the ratio of static to dynamic tokens).
* Since `cstree`s are lossless, `GreenNodeBuilder::token` must still be passed the source text even for static tokens.
* Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements.
* Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option<SyntaxNode>` is now the same size as `SyntaxNode` itself: the size of a pointer).

View file

@ -1,52 +1,104 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use cstree::*;
use lasso::{Interner, Rodeo};
use std::{fmt, hash::Hash};
#[derive(Debug)]
pub enum Element<'s> {
Node(Vec<Element<'s>>),
Token(&'s str),
Plus,
}
#[derive(Debug, Clone, Copy)]
pub enum TestKind {
Element { n: u16 },
Plus,
}
pub trait Bool: Hash + Ord + fmt::Debug + Copy {
const VALUE: bool;
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum TestLang {}
impl Language for TestLang {
type Kind = SyntaxKind;
pub struct TestLang<T: Bool> {
_marker: std::marker::PhantomData<T>,
}
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct NoStaticText;
impl Bool for NoStaticText {
const VALUE: bool = false;
}
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct UseStaticText;
impl Bool for UseStaticText {
const VALUE: bool = true;
}
impl<T: Bool> Language for TestLang<T> {
type Kind = TestKind;
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
raw
if raw.0 == u16::MAX - 1 {
TestKind::Plus
} else {
TestKind::Element { n: raw.0 }
}
}
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
kind
match kind {
TestKind::Element { n } => SyntaxKind(n),
TestKind::Plus => SyntaxKind(u16::MAX - 1),
}
}
fn static_text(kind: Self::Kind) -> Option<&'static str> {
if !<T as Bool>::VALUE {
return None;
}
match kind {
TestKind::Plus => Some("+"),
TestKind::Element { .. } => None,
}
}
}
pub fn build_tree_with_cache<'c, 'i, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode
pub fn build_tree_with_cache<'c, 'i, T: Bool, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode
where
I: Interner,
{
let mut builder = GreenNodeBuilder::with_cache(cache);
let mut builder: GreenNodeBuilder<TestLang<T>, I> = GreenNodeBuilder::with_cache(cache);
build_recursive(root, &mut builder, 0);
let (node, cache) = builder.finish();
assert!(cache.is_none());
node
}
pub fn build_recursive<'c, 'i, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'c, 'i, I>, mut from: u16) -> u16
pub fn build_recursive<'c, 'i, T: Bool, I>(
root: &Element<'_>,
builder: &mut GreenNodeBuilder<'c, 'i, TestLang<T>, I>,
mut from: u16,
) -> u16
where
I: Interner,
{
match root {
Element::Node(children) => {
builder.start_node(SyntaxKind(from));
builder.start_node(TestKind::Element { n: from });
for child in children {
from = build_recursive(child, builder, from + 1);
}
builder.finish_node();
}
Element::Token(text) => {
builder.token(SyntaxKind(from), *text);
builder.token(TestKind::Element { n: from }, *text);
}
Element::Plus => {
builder.token(TestKind::Plus, "+");
}
}
from
@ -55,25 +107,31 @@ where
fn two_level_tree() -> Element<'static> {
use Element::*;
Node(vec![
Node(vec![Token("0.0"), Token("0.1")]),
Node(vec![Token("0.0"), Plus, Token("0.1")]),
Node(vec![Token("1.0")]),
Node(vec![Token("2.0"), Token("2.1"), Token("2.2")]),
Node(vec![Token("2.0"), Plus, Token("2.1"), Plus, Token("2.2")]),
])
}
pub fn create(c: &mut Criterion) {
let mut group = c.benchmark_group("qualification");
let mut group = c.benchmark_group("two-level tree");
group.throughput(Throughput::Elements(1));
let mut interner = Rodeo::new();
let mut cache = NodeCache::with_interner(&mut interner);
let tree = two_level_tree();
group.bench_function("two-level tree", |b| {
group.bench_function("with static text", |b| {
b.iter(|| {
for _ in 0..100_000 {
let _tree = build_tree_with_cache(&tree, &mut cache);
}
let tree = build_tree_with_cache::<UseStaticText, _>(&tree, &mut cache);
black_box(tree);
})
});
group.bench_function("without static text", |b| {
b.iter(|| {
let tree = build_tree_with_cache::<NoStaticText, _>(&tree, &mut cache);
black_box(tree);
})
});

View file

@ -55,6 +55,16 @@ impl cstree::Language for Lang {
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
kind.into()
}
fn static_text(kind: Self::Kind) -> Option<&'static str> {
match kind {
Add => Some("+"),
Sub => Some("-"),
Mul => Some("*"),
Div => Some("/"),
_ => None,
}
}
}
type SyntaxNode = cstree::SyntaxNode<Lang>;
@ -65,7 +75,7 @@ type SyntaxElement = cstree::NodeOrToken<SyntaxNode, SyntaxToken>;
type SyntaxElementRef<'a> = cstree::NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>;
struct Parser<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> {
builder: GreenNodeBuilder<'static, 'static>,
builder: GreenNodeBuilder<'static, 'static, Lang>,
iter: Peekable<I>,
}
impl<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> Parser<'input, I> {
@ -78,7 +88,7 @@ impl<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> Parser<'input, I> {
fn bump(&mut self) {
if let Some((token, string)) = self.iter.next() {
self.builder.token(token.into(), string);
self.builder.token(token, string);
}
}
@ -86,7 +96,7 @@ impl<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> Parser<'input, I> {
match self.peek() {
Some(Number) => self.bump(),
_ => {
self.builder.start_node(Error.into());
self.builder.start_node(Error);
self.bump();
self.builder.finish_node();
}
@ -97,7 +107,7 @@ impl<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> Parser<'input, I> {
let checkpoint = self.builder.checkpoint();
next(self);
while self.peek().map(|t| tokens.contains(&t)).unwrap_or(false) {
self.builder.start_node_at(checkpoint, Operation.into());
self.builder.start_node_at(checkpoint, Operation);
self.bump();
next(self);
self.builder.finish_node();
@ -113,7 +123,7 @@ impl<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> Parser<'input, I> {
}
fn parse(mut self) -> (SyntaxNode, impl Resolver) {
self.builder.start_node(Root.into());
self.builder.start_node(Root);
self.parse_add();
self.builder.finish_node();

View file

@ -52,6 +52,14 @@ impl cstree::Language for Lang {
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
kind.into()
}
fn static_text(kind: Self::Kind) -> Option<&'static str> {
match kind {
LParen => Some("("),
RParen => Some(")"),
_ => None,
}
}
}
/// GreenNode is an immutable tree, which caches identical nodes and tokens, but doesn't contain
@ -60,7 +68,7 @@ impl cstree::Language for Lang {
/// the Resolver to get the real text back from the interned representation.
use cstree::{
interning::{IntoResolver, Resolver},
GreenNode,
GreenNode, Language,
};
/// You can construct GreenNodes by hand, but a builder is helpful for top-down parsers: it maintains
@ -84,7 +92,7 @@ fn parse(text: &str) -> Parse<impl Resolver> {
/// input tokens, including whitespace.
tokens: VecDeque<(SyntaxKind, &'input str)>,
/// the in-progress green tree.
builder: GreenNodeBuilder<'static, 'static>,
builder: GreenNodeBuilder<'static, 'static, Lang>,
/// the list of syntax errors we've accumulated so far.
errors: Vec<String>,
}
@ -102,13 +110,13 @@ fn parse(text: &str) -> Parse<impl Resolver> {
impl Parser<'_> {
fn parse(mut self) -> Parse<impl Resolver> {
// Make sure that the root node covers all source
self.builder.start_node(Root.into());
self.builder.start_node(Root);
// Parse zero or more S-expressions
loop {
match self.sexp() {
SexpRes::Eof => break,
SexpRes::RParen => {
self.builder.start_node(Error.into());
self.builder.start_node(Error);
self.errors.push("unmatched `)`".to_string());
self.bump(); // be sure to advance even in case of an error, so as to not get stuck
self.builder.finish_node();
@ -135,7 +143,7 @@ fn parse(text: &str) -> Parse<impl Resolver> {
fn list(&mut self) {
assert_eq!(self.current(), Some(LParen));
// Start the list node
self.builder.start_node(List.into());
self.builder.start_node(List);
self.bump(); // '('
loop {
match self.sexp() {
@ -166,7 +174,7 @@ fn parse(text: &str) -> Parse<impl Resolver> {
match t {
LParen => self.list(),
Word => {
self.builder.start_node(Atom.into());
self.builder.start_node(Atom);
self.bump();
self.builder.finish_node();
}
@ -179,7 +187,7 @@ fn parse(text: &str) -> Parse<impl Resolver> {
/// Advance one token, adding it to the current branch of the tree builder.
fn bump(&mut self) {
let (kind, text) = self.tokens.pop_front().unwrap();
self.builder.token(kind.into(), text);
self.builder.token(kind, text);
}
/// Peek at the first unprocessed token
@ -348,7 +356,9 @@ impl ast::Atom {
fn text<'r>(&self, resolver: &'r impl Resolver) -> &'r str {
match &self.0.green().children().next() {
Some(cstree::NodeOrToken::Token(token)) => token.text(resolver),
Some(cstree::NodeOrToken::Token(token)) => Lang::static_text(Lang::kind_from_raw(token.kind()))
.or_else(|| token.text(resolver))
.unwrap(),
_ => unreachable!(),
}
}

View file

@ -26,12 +26,12 @@ pub struct SyntaxKind(pub u16);
#[cfg(test)]
mod tests {
use super::*;
use node::GreenNodeHead;
use token::GreenTokenData;
use super::*;
#[test]
#[cfg_attr(miri, ignore)]
fn assert_send_sync() {
fn f<T: Send + Sync>() {}
f::<GreenNode>();
@ -41,6 +41,7 @@ mod tests {
}
#[test]
#[cfg_attr(miri, ignore)]
#[rustfmt::skip]
fn assert_green_sizes() {
use std::mem::size_of;

View file

@ -1,15 +1,13 @@
use std::{
convert::TryFrom,
hash::{Hash, Hasher},
};
use std::hash::{Hash, Hasher};
use fxhash::{FxHashMap, FxHasher32};
use text_size::TextSize;
use crate::{
green::{interner::TokenInterner, GreenElement, GreenNode, GreenToken, SyntaxKind},
interning::Interner,
NodeOrToken,
interning::{Interner, Key},
utility_types::MaybeOwned,
Language, NodeOrToken,
};
use super::{node::GreenNodeHead, token::GreenTokenData};
@ -36,20 +34,20 @@ impl NodeCache<'static> {
/// tokens. To re-use an existing interner, see [`with_interner`](NodeCache::with_interner).
/// # Examples
/// ```
/// # use cstree::*;
/// # const ROOT: SyntaxKind = SyntaxKind(0);
/// # const INT: SyntaxKind = SyntaxKind(1);
/// # fn parse(b: &mut GreenNodeBuilder, s: &str) {}
/// # use cstree::testing::{*, Language as _};
/// // Build a tree
/// let mut cache = NodeCache::new();
/// let mut builder = GreenNodeBuilder::with_cache(&mut cache);
/// # builder.start_node(ROOT);
/// # builder.token(INT, "42");
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::with_cache(&mut cache);
/// # builder.start_node(Root);
/// # builder.token(Int, "42");
/// # builder.finish_node();
/// parse(&mut builder, "42");
/// let (tree, _) = builder.finish();
/// assert_eq!(tree.kind(), ROOT);
///
/// // Check it out!
/// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root));
/// let int = tree.children().next().unwrap();
/// assert_eq!(int.kind(), INT);
/// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int));
/// ```
pub fn new() -> Self {
Self {
@ -74,23 +72,26 @@ where
/// (strings) across tokens.
/// # Examples
/// ```
/// # use cstree::*;
/// # use cstree::testing::{*, Language as _};
/// use lasso::Rodeo;
/// # const ROOT: SyntaxKind = SyntaxKind(0);
/// # const INT: SyntaxKind = SyntaxKind(1);
/// # fn parse(b: &mut GreenNodeBuilder<Rodeo>, s: &str) {}
///
/// // Create the builder from a custom `Rodeo`
/// let mut interner = Rodeo::new();
/// let mut cache = NodeCache::with_interner(&mut interner);
/// let mut builder = GreenNodeBuilder::with_cache(&mut cache);
/// # builder.start_node(ROOT);
/// # builder.token(INT, "42");
/// let mut builder: GreenNodeBuilder<MyLanguage, Rodeo> = GreenNodeBuilder::with_cache(&mut cache);
///
/// // Construct the tree
/// # builder.start_node(Root);
/// # builder.token(Int, "42");
/// # builder.finish_node();
/// parse(&mut builder, "42");
/// let (tree, _) = builder.finish();
/// assert_eq!(tree.kind(), ROOT);
///
/// // Use the tree
/// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root));
/// let int = tree.children().next().unwrap();
/// assert_eq!(int.kind(), INT);
/// assert_eq!(int.as_token().unwrap().text(&interner), "42");
/// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int));
/// assert_eq!(int.as_token().unwrap().text(&interner), Some("42"));
/// ```
#[inline]
pub fn with_interner(interner: &'i mut I) -> Self {
@ -105,24 +106,27 @@ where
/// (strings) across tokens.
/// # Examples
/// ```
/// # use cstree::*;
/// # use cstree::testing::{*, Language as _};
/// use lasso::Rodeo;
/// # const ROOT: SyntaxKind = SyntaxKind(0);
/// # const INT: SyntaxKind = SyntaxKind(1);
/// # fn parse(b: &mut GreenNodeBuilder<Rodeo>, s: &str) {}
///
/// // Create the builder from a custom `Rodeo`
/// let mut interner = Rodeo::new();
/// let cache = NodeCache::from_interner(interner);
/// let mut builder = GreenNodeBuilder::from_cache(cache);
/// # builder.start_node(ROOT);
/// # builder.token(INT, "42");
/// let mut builder: GreenNodeBuilder<MyLanguage, Rodeo> = GreenNodeBuilder::from_cache(cache);
///
/// // Construct the tree
/// # builder.start_node(Root);
/// # builder.token(Int, "42");
/// # builder.finish_node();
/// parse(&mut builder, "42");
/// let (tree, cache) = builder.finish();
///
/// // Use the tree
/// let interner = cache.unwrap().into_interner().unwrap();
/// assert_eq!(tree.kind(), ROOT);
/// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root));
/// let int = tree.children().next().unwrap();
/// assert_eq!(int.kind(), INT);
/// assert_eq!(int.as_token().unwrap().text(&interner), "42");
/// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int));
/// assert_eq!(int.as_token().unwrap().text(&interner), Some("42"));
/// ```
#[inline]
pub fn from_interner(interner: I) -> Self {
@ -165,10 +169,12 @@ where
self.interner.into_owned()
}
fn node(&mut self, kind: SyntaxKind, children: &[GreenElement]) -> GreenNode {
fn node<L: Language>(&mut self, kind: L::Kind, all_children: &mut Vec<GreenElement>, offset: usize) -> GreenNode {
// NOTE: this fn must remove all children starting at `first_child` from `all_children` before returning
let kind = L::kind_to_raw(kind);
let mut hasher = FxHasher32::default();
let mut text_len: TextSize = 0.into();
for child in children {
for child in &all_children[offset..] {
text_len += child.text_len();
child.hash(&mut hasher);
}
@ -181,20 +187,26 @@ where
// For example, all `#[inline]` in this file share the same green node!
// For `libsyntax/parse/parser.rs`, measurements show that deduping saves
// 17% of the memory for green nodes!
let children = all_children.drain(offset..);
if children.len() <= CHILDREN_CACHE_THRESHOLD {
self.get_cached_node(kind, children, text_len, child_hash)
} else {
GreenNode::new_with_len_and_hash(kind, children.iter().cloned(), text_len, child_hash)
GreenNode::new_with_len_and_hash(kind, children, text_len, child_hash)
}
}
#[inline(always)]
fn intern(&mut self, text: &str) -> Key {
self.interner.get_or_intern(text)
}
/// Creates a [`GreenNode`] by looking inside the cache or inserting
/// a new node into the cache if it's a cache miss.
#[inline]
fn get_cached_node(
&mut self,
kind: SyntaxKind,
children: &[GreenElement],
children: std::vec::Drain<'_, GreenElement>,
text_len: TextSize,
child_hash: u32,
) -> GreenNode {
@ -205,13 +217,13 @@ where
};
self.nodes
.entry(head)
.or_insert_with_key(|head| GreenNode::from_head_and_children(head.clone(), children.iter().cloned()))
.or_insert_with_key(|head| GreenNode::from_head_and_children(head.clone(), children))
.clone()
}
fn token(&mut self, kind: SyntaxKind, text: &str) -> GreenToken {
let text_len = TextSize::try_from(text.len()).unwrap();
let text = self.interner.get_or_intern(text);
fn token<L: Language>(&mut self, kind: L::Kind, text: Option<Key>, len: u32) -> GreenToken {
let text_len = TextSize::from(len);
let kind = L::kind_to_raw(kind);
let data = GreenTokenData { kind, text, text_len };
self.tokens
.entry(data)
@ -220,47 +232,6 @@ where
}
}
#[derive(Debug)]
enum MaybeOwned<'a, T> {
Owned(T),
Borrowed(&'a mut T),
}
impl<T> MaybeOwned<'_, T> {
fn into_owned(self) -> Option<T> {
match self {
MaybeOwned::Owned(owned) => Some(owned),
MaybeOwned::Borrowed(_) => None,
}
}
}
impl<T> std::ops::Deref for MaybeOwned<'_, T> {
type Target = T;
fn deref(&self) -> &T {
match self {
MaybeOwned::Owned(it) => it,
MaybeOwned::Borrowed(it) => *it,
}
}
}
impl<T> std::ops::DerefMut for MaybeOwned<'_, T> {
fn deref_mut(&mut self) -> &mut T {
match self {
MaybeOwned::Owned(it) => it,
MaybeOwned::Borrowed(it) => *it,
}
}
}
impl<T: Default> Default for MaybeOwned<'_, T> {
fn default() -> Self {
MaybeOwned::Owned(T::default())
}
}
/// A checkpoint for maybe wrapping a node. See [`GreenNodeBuilder::checkpoint`] for details.
#[derive(Clone, Copy, Debug)]
pub struct Checkpoint(usize);
@ -274,28 +245,30 @@ pub struct Checkpoint(usize);
///
/// # Examples
/// ```
/// # use cstree::{*, interning::IntoResolver};
/// # const ROOT: SyntaxKind = SyntaxKind(0);
/// # const INT: SyntaxKind = SyntaxKind(1);
/// let mut builder = GreenNodeBuilder::new();
/// builder.start_node(ROOT);
/// builder.token(INT, "42");
/// # use cstree::testing::{*, Language as _};
/// # use cstree::interning::IntoResolver;
/// // Build a tree
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
/// builder.start_node(Root);
/// builder.token(Int, "42");
/// builder.finish_node();
/// let (tree, cache) = builder.finish();
/// assert_eq!(tree.kind(), ROOT);
///
/// // Check it out!
/// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root));
/// let int = tree.children().next().unwrap();
/// assert_eq!(int.kind(), INT);
/// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int));
/// let resolver = cache.unwrap().into_interner().unwrap().into_resolver();
/// assert_eq!(int.as_token().unwrap().text(&resolver), "42");
/// assert_eq!(int.as_token().unwrap().text(&resolver), Some("42"));
/// ```
#[derive(Debug)]
pub struct GreenNodeBuilder<'cache, 'interner, I = TokenInterner> {
pub struct GreenNodeBuilder<'cache, 'interner, L: Language, I = TokenInterner> {
cache: MaybeOwned<'cache, NodeCache<'interner, I>>,
parents: Vec<(SyntaxKind, usize)>,
parents: Vec<(L::Kind, usize)>,
children: Vec<GreenElement>,
}
impl GreenNodeBuilder<'static, 'static> {
impl<L: Language> GreenNodeBuilder<'static, 'static, L> {
/// Creates new builder with an empty [`NodeCache`].
pub fn new() -> Self {
Self {
@ -306,14 +279,15 @@ impl GreenNodeBuilder<'static, 'static> {
}
}
impl Default for GreenNodeBuilder<'static, 'static> {
impl<L: Language> Default for GreenNodeBuilder<'static, 'static, L> {
fn default() -> Self {
Self::new()
}
}
impl<'cache, 'interner, I> GreenNodeBuilder<'cache, 'interner, I>
impl<'cache, 'interner, L, I> GreenNodeBuilder<'cache, 'interner, L, I>
where
L: Language,
I: Interner,
{
/// Reusing a [`NodeCache`] between multiple builders saves memory, as it allows to structurally
@ -331,22 +305,24 @@ where
/// The `cache` given will be returned on [`finish`](GreenNodeBuilder::finish).
/// # Examples
/// ```
/// # use cstree::*;
/// # const ROOT: SyntaxKind = SyntaxKind(0);
/// # const INT: SyntaxKind = SyntaxKind(1);
/// # fn parse(b: &mut GreenNodeBuilder, s: &str) {}
/// # use cstree::testing::{*, Language as _};
/// // Construct a builder from our own cache
/// let cache = NodeCache::new();
/// let mut builder = GreenNodeBuilder::from_cache(cache);
/// # builder.start_node(ROOT);
/// # builder.token(INT, "42");
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::from_cache(cache);
///
/// // Build a tree
/// # builder.start_node(Root);
/// # builder.token(Int, "42");
/// # builder.finish_node();
/// parse(&mut builder, "42");
/// let (tree, cache) = builder.finish();
///
/// // Use the tree
/// let interner = cache.unwrap().into_interner().unwrap();
/// assert_eq!(tree.kind(), ROOT);
/// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root));
/// let int = tree.children().next().unwrap();
/// assert_eq!(int.kind(), INT);
/// assert_eq!(int.as_token().unwrap().text(&interner), "42");
/// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int));
/// assert_eq!(int.as_token().unwrap().text(&interner), Some("42"));
/// ```
pub fn from_cache(cache: NodeCache<'interner, I>) -> Self {
Self {
@ -390,9 +366,9 @@ where
/// This is the same interner as used by the underlying [`NodeCache`].
/// # Examples
/// ```
/// # use cstree::*;
/// # use cstree::testing::*;
/// # use cstree::interning::*;
/// let mut builder = GreenNodeBuilder::new();
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
/// let interner = builder.interner_mut();
/// let key = interner.get_or_intern("foo");
/// assert_eq!(interner.resolve(&key), "foo");
@ -402,16 +378,34 @@ where
&mut *self.cache.interner
}
/// Add new token to the current branch.
/// Add a new token to the current branch without storing an explicit section of text.
/// This is be useful if the text can always be inferred from the token's `kind`, for example
/// when using kinds for specific operators or punctuation.
///
/// ## Panics
/// In debug mode, if `kind` has static text, this function will verify that `text` matches that text.
#[inline]
pub fn token(&mut self, kind: SyntaxKind, text: &str) {
let token = self.cache.token(kind, text);
pub fn token(&mut self, kind: L::Kind, text: &str) {
let token = match L::static_text(kind) {
Some(static_text) => {
debug_assert_eq!(
static_text, text,
r#"Received `{kind:?}` token which should have text "{static_text}", but "{text}" was given."#
);
self.cache.token::<L>(kind, None, static_text.len() as u32)
}
None => {
let len = text.len() as u32;
let text = self.cache.intern(text);
self.cache.token::<L>(kind, Some(text), len)
}
};
self.children.push(token.into());
}
/// Start new node of the given `kind` and make it current.
#[inline]
pub fn start_node(&mut self, kind: SyntaxKind) {
pub fn start_node(&mut self, kind: L::Kind) {
let len = self.children.len();
self.parents.push((kind, len));
}
@ -420,8 +414,8 @@ where
#[inline]
pub fn finish_node(&mut self) {
let (kind, first_child) = self.parents.pop().unwrap();
let node = self.cache.node(kind, &self.children[first_child..]);
self.children.truncate(first_child);
// NOTE: we rely on the node cache to remove all children starting at `first_child` from `self.children`
let node = self.cache.node::<L>(kind, &mut self.children, first_child);
self.children.push(node.into());
}
@ -432,21 +426,20 @@ where
///
/// # Examples
/// ```
/// # use cstree::{GreenNodeBuilder, SyntaxKind};
/// # const PLUS: SyntaxKind = SyntaxKind(0);
/// # const OPERATION: SyntaxKind = SyntaxKind(1);
/// # use cstree::testing::*;
/// # use cstree::{GreenNodeBuilder, Language};
/// # struct Parser;
/// # impl Parser {
/// # fn peek(&self) -> Option<SyntaxKind> { None }
/// # fn peek(&self) -> Option<TestSyntaxKind> { None }
/// # fn parse_expr(&mut self) {}
/// # }
/// # let mut builder = GreenNodeBuilder::new();
/// # let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
/// # let mut parser = Parser;
/// let checkpoint = builder.checkpoint();
/// parser.parse_expr();
/// if parser.peek() == Some(PLUS) {
/// if let Some(Plus) = parser.peek() {
/// // 1 + 2 = Add(1, 2)
/// builder.start_node_at(checkpoint, OPERATION);
/// builder.start_node_at(checkpoint, Operation);
/// parser.parse_expr();
/// builder.finish_node();
/// }
@ -459,7 +452,7 @@ where
/// Wrap the previous branch marked by [`checkpoint`](GreenNodeBuilder::checkpoint) in a new
/// branch and make it current.
#[inline]
pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: L::Kind) {
let Checkpoint(checkpoint) = checkpoint;
assert!(
checkpoint <= self.children.len(),

View file

@ -1,4 +1,4 @@
use std::{fmt, hash, mem::ManuallyDrop, ptr};
use std::{fmt, hash, mem::ManuallyDrop, ptr::NonNull};
use crate::{
green::SyntaxKind,
@ -12,13 +12,13 @@ use triomphe::Arc;
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
pub(super) struct GreenTokenData {
pub(super) kind: SyntaxKind,
pub(super) text: Key,
pub(super) text: Option<Key>,
pub(super) text_len: TextSize,
}
/// Leaf node in the immutable "green" tree.
pub struct GreenToken {
ptr: ptr::NonNull<GreenTokenData>,
ptr: NonNull<GreenTokenData>,
}
unsafe impl Send for GreenToken {} // where GreenTokenData: Send + Sync
@ -26,17 +26,17 @@ unsafe impl Sync for GreenToken {} // where GreenTokenData: Send + Sync
pub(super) const IS_TOKEN_TAG: usize = 0x1;
impl GreenToken {
fn add_tag(ptr: ptr::NonNull<GreenTokenData>) -> ptr::NonNull<GreenTokenData> {
fn add_tag(ptr: NonNull<GreenTokenData>) -> NonNull<GreenTokenData> {
unsafe {
let ptr = ptr.as_ptr().map_addr(|addr| addr | IS_TOKEN_TAG);
ptr::NonNull::new_unchecked(ptr)
NonNull::new_unchecked(ptr)
}
}
fn remove_tag(ptr: ptr::NonNull<GreenTokenData>) -> ptr::NonNull<GreenTokenData> {
fn remove_tag(ptr: NonNull<GreenTokenData>) -> NonNull<GreenTokenData> {
unsafe {
let ptr = ptr.as_ptr().map_addr(|addr| addr & !IS_TOKEN_TAG);
ptr::NonNull::new_unchecked(ptr)
NonNull::new_unchecked(ptr)
}
}
@ -48,7 +48,7 @@ impl GreenToken {
#[inline]
pub(super) fn new(data: GreenTokenData) -> GreenToken {
let ptr = Arc::into_raw(Arc::new(data));
let ptr = ptr::NonNull::new(ptr as *mut _).unwrap();
let ptr = NonNull::new(ptr as *mut _).unwrap();
GreenToken {
ptr: Self::add_tag(ptr),
}
@ -62,11 +62,11 @@ impl GreenToken {
/// The original source text of this Token.
#[inline]
pub fn text<'i, I>(&self, resolver: &'i I) -> &'i str
pub fn text<'i, I>(&self, resolver: &'i I) -> Option<&'i str>
where
I: Resolver + ?Sized,
{
resolver.resolve(&self.data().text)
self.data().text.map(|key| resolver.resolve(&key))
}
/// Returns the length of text covered by this token.
@ -80,7 +80,7 @@ impl GreenToken {
///
/// See also [`text`](GreenToken::text).
#[inline]
pub fn text_key(&self) -> Key {
pub fn text_key(&self) -> Option<Key> {
self.data().text
}
}
@ -102,7 +102,7 @@ impl Clone for GreenToken {
let arc = ManuallyDrop::new(Arc::from_raw(ptr.as_ptr()));
Arc::into_raw(Arc::clone(&arc))
};
let ptr = unsafe { ptr::NonNull::new_unchecked(ptr as *mut _) };
let ptr = unsafe { NonNull::new_unchecked(ptr as *mut _) };
GreenToken {
ptr: Self::add_tag(ptr),
}

View file

@ -71,18 +71,21 @@ pub use crate::{
};
pub use triomphe::Arc;
/// The `Language` trait is the bridge between the internal `cstree` representation and your language
/// types.
/// This is essential to providing a [`SyntaxNode`] API that can be used with your types, as in the
/// The `Language` trait is the bridge between the internal `cstree` representation and your
/// language's types.
/// This is essential for providing a [`SyntaxNode`] API that can be used with your types, as in the
/// `s_expressions` example:
///
/// ```
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// # #[allow(non_camel_case_types)]
/// #[repr(u16)]
/// enum SyntaxKind {
/// ROOT, // top-level node
/// ATOM, // `+`, `15`
/// WHITESPACE, // whitespaces is explicit
/// Plus, // `+`
/// Minus, // `-`
/// Integer, // like `15`
/// Expression, // combined expression, like `5 + 4 - 3`
/// Whitespace, // whitespaces is explicit
/// #[doc(hidden)]
/// __LAST,
/// }
@ -102,16 +105,75 @@ pub use triomphe::Arc;
/// fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
/// cstree::SyntaxKind(kind as u16)
/// }
///
/// fn static_text(kind: Self::Kind) -> Option<&'static str> {
/// match kind {
/// Plus => Some("+"),
/// Minus => Some("-"),
/// _ => None,
/// }
/// }
/// }
/// ```
pub trait Language: Sized + Clone + Copy + fmt::Debug + Eq + Ord + std::hash::Hash {
/// A type that represents what items in your Language can be.
/// Typically, this is an `enum` with variants such as `Identifier`, `Literal`, ...
type Kind: fmt::Debug;
type Kind: Sized + Clone + Copy + fmt::Debug;
/// Construct a semantic item kind from the compact representation.
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind;
/// Convert a semantic item kind into a more compact representation.
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind;
/// Fixed text for a particular syntax kind.
///
/// Implement for kinds that will only ever represent the same text, such as punctuation (like a
/// semicolon), keywords (like `fn`), or operators (like `<=`).
fn static_text(kind: Self::Kind) -> Option<&'static str>;
}
#[doc(hidden)]
#[allow(unsafe_code, unused)]
pub mod testing {
pub use crate::*;
pub fn parse<L: Language, I>(_b: &mut super::GreenNodeBuilder<L, I>, _s: &str) {}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
#[allow(non_camel_case_types)]
pub enum TestSyntaxKind {
Plus,
Identifier,
Int,
Float,
Operation,
Root,
Whitespace,
__LAST,
}
pub use TestSyntaxKind::*;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum TestLang {}
pub type MyLanguage = TestLang;
impl Language for TestLang {
type Kind = TestSyntaxKind;
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
assert!(raw.0 <= TestSyntaxKind::__LAST as u16);
unsafe { std::mem::transmute::<u16, TestSyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
SyntaxKind(kind as u16)
}
fn static_text(kind: Self::Kind) -> Option<&'static str> {
match kind {
TestSyntaxKind::Plus => Some("+"),
_ => None,
}
}
}
}

View file

@ -177,16 +177,16 @@ where
where
A: SeqAccess<'de>,
{
let mut builder = GreenNodeBuilder::new();
let mut builder: GreenNodeBuilder<L> = GreenNodeBuilder::new();
let mut data_indices = VecDeque::new();
while let Some(next) = seq.next_element::<Event<'_>>()? {
match next {
Event::EnterNode(kind, has_data) => {
builder.start_node(kind);
builder.start_node(L::kind_from_raw(kind));
data_indices.push_back(has_data);
}
Event::Token(kind, text) => builder.token(kind, text),
Event::Token(kind, text) => builder.token(L::kind_from_raw(kind), text),
Event::LeaveNode => builder.finish_node(),
}
}

View file

@ -33,3 +33,34 @@ pub use text::SyntaxText;
// this.
//
// - DQ 01/2021
#[cfg(test)]
mod tests {
use crate::testing::*;
#[test]
#[cfg_attr(miri, ignore)]
fn assert_send_sync() {
fn f<T: Send + Sync>() {}
f::<SyntaxNode<TestLang>>();
f::<SyntaxToken<TestLang>>();
f::<SyntaxElement<TestLang>>();
f::<SyntaxElementRef<'static, TestLang>>();
f::<ResolvedNode<TestLang>>();
f::<ResolvedToken<TestLang>>();
f::<ResolvedElement<TestLang>>();
f::<ResolvedElementRef<'static, TestLang>>();
}
#[test]
#[cfg_attr(miri, ignore)]
#[rustfmt::skip]
fn assert_syntax_sizes() {
use std::mem::size_of;
assert_eq!(size_of::<SyntaxNode<TestLang>>(), size_of::<*const u8>());
assert_eq!(size_of::<Option<SyntaxNode<TestLang>>>(), size_of::<*const u8>()); // verify niche opt of `NonNull`
assert_eq!(size_of::<SyntaxToken<TestLang>>(), size_of::<*const u8>() + size_of::<u32>() * 2);
}
}

View file

@ -11,7 +11,8 @@ use std::{
cell::UnsafeCell,
fmt,
hash::{Hash, Hasher},
iter, ptr,
iter,
ptr::{self, NonNull},
sync::{
atomic::{AtomicU32, Ordering},
Arc as StdArc,
@ -26,7 +27,7 @@ use triomphe::Arc;
#[derive(Debug)]
#[repr(transparent)]
pub struct SyntaxNode<L: Language, D: 'static = ()> {
data: *mut NodeData<L, D>,
data: NonNull<NodeData<L, D>>,
}
unsafe impl<L: Language, D: 'static> Send for SyntaxNode<L, D> {}
@ -158,7 +159,7 @@ impl<L: Language, D> Drop for SyntaxNode<L, D> {
root.drop_recursive();
let root_data = root.data;
drop(root);
unsafe { drop(Box::from_raw(root_data)) };
unsafe { drop(Box::from_raw(root_data.as_ptr())) };
unsafe { drop(Box::from_raw(ref_count)) };
}
}
@ -167,7 +168,7 @@ impl<L: Language, D> Drop for SyntaxNode<L, D> {
impl<L: Language, D> SyntaxNode<L, D> {
#[inline]
fn data(&self) -> &NodeData<L, D> {
unsafe { &*self.data }
unsafe { self.data.as_ref() }
}
#[inline]
@ -209,7 +210,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
// safety: since there are no more `parent` pointers from the children of the
// node this data belonged to, and we have just dropped the node, there are now
// no more references to `data`
let data = unsafe { Box::from_raw(data) };
let data = unsafe { Box::from_raw(data.as_ptr()) };
drop(data);
}
}
@ -227,7 +228,7 @@ impl<L: Language, D> Eq for SyntaxNode<L, D> {}
impl<L: Language, D> Hash for SyntaxNode<L, D> {
fn hash<H: Hasher>(&self, state: &mut H) {
ptr::hash(self.data, state);
self.data.hash(state);
}
}
@ -251,7 +252,7 @@ impl<L: Language, D> Kind<L, D> {
pub(super) struct NodeData<L: Language, D: 'static> {
kind: Kind<L, D>,
green: ptr::NonNull<GreenNode>,
green: NonNull<GreenNode>,
ref_count: *mut AtomicU32,
data: RwLock<Option<Arc<D>>>,
children: Vec<UnsafeCell<Option<SyntaxElement<L, D>>>>,
@ -259,24 +260,21 @@ pub(super) struct NodeData<L: Language, D: 'static> {
}
impl<L: Language, D> NodeData<L, D> {
fn new(
kind: Kind<L, D>,
green: ptr::NonNull<GreenNode>,
ref_count: *mut AtomicU32,
n_children: usize,
) -> *mut Self {
fn new(kind: Kind<L, D>, green: NonNull<GreenNode>, ref_count: *mut AtomicU32, n_children: usize) -> NonNull<Self> {
let mut children = Vec::with_capacity(n_children);
let mut child_locks = Vec::with_capacity(n_children);
children.extend((0..n_children).map(|_| Default::default()));
child_locks.extend((0..n_children).map(|_| Default::default()));
Box::into_raw(Box::new(Self {
let ptr = Box::into_raw(Box::new(Self {
kind,
green,
ref_count,
data: RwLock::default(),
children,
child_locks,
}))
}));
// safety: guaranteed by `Box::into_raw`
unsafe { NonNull::new_unchecked(ptr) }
}
}
@ -285,41 +283,20 @@ impl<L: Language, D> SyntaxNode<L, D> {
///
/// # Example
/// ```
/// # use cstree::*;
/// # #[allow(non_camel_case_types)]
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// #[repr(u16)]
/// enum SyntaxKind {
/// ROOT,
/// }
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// enum Lang {}
/// impl cstree::Language for Lang {
/// // ...
/// # type Kind = SyntaxKind;
/// #
/// # fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
/// # assert!(raw.0 <= SyntaxKind::ROOT as u16);
/// # unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
/// # }
/// #
/// # fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
/// # cstree::SyntaxKind(kind as u16)
/// # }
/// }
/// # let mut builder = GreenNodeBuilder::new();
/// # builder.start_node(SyntaxKind(0));
/// # use cstree::testing::*;
/// # let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
/// # builder.start_node(Root);
/// # builder.finish_node();
/// # let (green, _) = builder.finish();
/// let root: SyntaxNode<Lang> = SyntaxNode::new_root(green);
/// assert_eq!(root.kind(), SyntaxKind::ROOT);
/// # let (green_root, _) = builder.finish();
/// let root: SyntaxNode<MyLanguage> = SyntaxNode::new_root(green_root);
/// assert_eq!(root.kind(), Root);
/// ```
#[inline]
pub fn new_root(green: GreenNode) -> Self {
Self::make_new_root(green, None)
}
pub(super) fn new(data: *mut NodeData<L, D>) -> Self {
fn new(data: NonNull<NodeData<L, D>>) -> Self {
Self { data }
}
@ -328,12 +305,12 @@ impl<L: Language, D> SyntaxNode<L, D> {
let n_children = green.children().count();
let data = NodeData::new(
Kind::Root(green, resolver),
ptr::NonNull::dangling(),
NonNull::dangling(),
Box::into_raw(ref_count),
n_children,
);
let ret = Self::new(data);
let green: ptr::NonNull<GreenNode> = match &ret.data().kind {
let green: NonNull<GreenNode> = match &ret.data().kind {
Kind::Root(green, _resolver) => green.into(),
_ => unreachable!(),
};
@ -341,7 +318,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
// Also, we use `addr_of_mut` here in order to not have to go through a `&mut *ret.data`,
// which would invalidate the reading provenance of `green`, since `green` is contained in
// the date once we have written it here.
unsafe { ptr::addr_of_mut!((*ret.data).green).write(green) };
unsafe { ptr::addr_of_mut!((*ret.data.as_ptr()).green).write(green) };
ret
}
@ -350,39 +327,18 @@ impl<L: Language, D> SyntaxNode<L, D> {
///
/// # Example
/// ```
/// # use cstree::*;
/// # #[allow(non_camel_case_types)]
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// #[repr(u16)]
/// enum SyntaxKind {
/// TOKEN,
/// ROOT,
/// }
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// enum Lang {}
/// impl cstree::Language for Lang {
/// // ...
/// # type Kind = SyntaxKind;
/// #
/// # fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
/// # assert!(raw.0 <= SyntaxKind::ROOT as u16);
/// # unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
/// # }
/// #
/// # fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
/// # cstree::SyntaxKind(kind as u16)
/// # }
/// }
/// # const ROOT: cstree::SyntaxKind = cstree::SyntaxKind(0);
/// # const TOKEN: cstree::SyntaxKind = cstree::SyntaxKind(1);
/// # type SyntaxNode<L> = cstree::SyntaxNode<L, ()>;
/// let mut builder = GreenNodeBuilder::new();
/// builder.start_node(ROOT);
/// builder.token(TOKEN, "content");
/// # use cstree::testing::*;
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
/// builder.start_node(Root);
/// builder.token(Identifier, "content");
/// builder.finish_node();
/// let (green, cache) = builder.finish();
/// let root: ResolvedNode<Lang> =
/// SyntaxNode::new_root_with_resolver(green, cache.unwrap().into_interner().unwrap());
///
/// // We are safe to use `unwrap` here because we created the builder with `new`.
/// // This created a new interner and cache for us owned by the builder,
/// // and `finish` always returns these.
/// let interner = cache.unwrap().into_interner().unwrap();
/// let root: ResolvedNode<MyLanguage> = SyntaxNode::new_root_with_resolver(green, interner);
/// assert_eq!(root.text(), "content");
/// ```
#[inline]
@ -485,7 +441,7 @@ impl<L: Language, D> SyntaxNode<L, D> {
ref_count.fetch_add(2, Ordering::AcqRel);
let node_data = node.data;
drop(node);
unsafe { drop(Box::from_raw(node_data)) };
unsafe { drop(Box::from_raw(node_data.as_ptr())) };
}
SyntaxElement::Token(token) => {
// We don't have to worry about `NodeData` or `SyntaxToken<L>`'s own `Drop` here,

View file

@ -198,7 +198,10 @@ impl<L: Language, D> ResolvedToken<L, D> {
/// Uses the resolver associated with this tree to return the source text of this token.
#[inline]
pub fn text(&self) -> &str {
self.green().text(&**self.resolver())
// one of the two must be present upon construction
self.static_text()
.or_else(|| self.green().text(&**self.resolver()))
.unwrap()
}
}
@ -725,31 +728,3 @@ impl<'a, L: Language, D> ResolvedElementRef<'a, L, D> {
}
}
}
#[test]
fn assert_send_sync() {
use crate::SyntaxKind;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
enum L {}
#[derive(Debug)]
enum Kind {
Var,
}
impl Language for L {
type Kind = Kind;
fn kind_from_raw(_: SyntaxKind) -> Self::Kind {
Kind::Var
}
fn kind_to_raw(_: Self::Kind) -> SyntaxKind {
SyntaxKind(0)
}
}
fn f<T: Send + Sync>() {}
f::<ResolvedNode<L>>();
f::<ResolvedToken<L>>();
f::<ResolvedElement<L>>();
f::<ResolvedElementRef<'static, L>>();
}

View file

@ -13,43 +13,21 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T
///
/// # Example
/// ```
/// # use cstree::{*, interning::IntoResolver};
/// # #[allow(non_camel_case_types)]
/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// # #[repr(u16)]
/// # enum SyntaxKind {
/// # TOKEN,
/// # ROOT,
/// # }
/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// # enum Lang {}
/// # impl cstree::Language for Lang {
/// # type Kind = SyntaxKind;
/// # use cstree::testing::*;
/// # use cstree::interning::IntoResolver;
/// #
/// # fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
/// # assert!(raw.0 <= SyntaxKind::ROOT as u16);
/// # unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
/// # }
/// #
/// # fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
/// # cstree::SyntaxKind(kind as u16)
/// # }
/// # }
/// # type SyntaxNode = cstree::SyntaxNode<Lang, ()>;
/// # type ResolvedNode = cstree::ResolvedNode<Lang, ()>;
/// #
/// # fn parse_float_literal(s: &str) -> ResolvedNode {
/// # const LITERAL: cstree::SyntaxKind = cstree::SyntaxKind(0);
/// # let mut builder = GreenNodeBuilder::new();
/// # builder.start_node(LITERAL);
/// # builder.token(LITERAL, s);
/// fn parse_float_literal(s: &str) -> ResolvedNode<MyLanguage> {
/// // parsing...
/// # let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
/// # builder.start_node(Float);
/// # builder.token(Float, s);
/// # builder.finish_node();
/// # let (root, cache) = builder.finish();
/// # let resolver = cache.unwrap().into_interner().unwrap().into_resolver();
/// # SyntaxNode::new_root_with_resolver(root, resolver)
/// # }
/// let node = parse_float_literal("2.748E2");
/// let text = node.text();
/// }
/// let float_node = parse_float_literal("2.748E2");
/// let text = float_node.text();
/// assert_eq!(text.len(), 7.into());
/// assert!(text.contains_char('E'));
/// assert_eq!(text.find_char('E'), Some(5.into()));
@ -412,13 +390,28 @@ mod tests {
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
kind
}
fn static_text(kind: Self::Kind) -> Option<&'static str> {
if kind == SyntaxKind(1) {
Some("{")
} else if kind == SyntaxKind(2) {
Some("}")
} else {
None
}
}
}
fn build_tree(chunks: &[&str]) -> (SyntaxNode<TestLang, ()>, impl Resolver) {
let mut builder = GreenNodeBuilder::new();
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
builder.start_node(SyntaxKind(62));
for &chunk in chunks.iter() {
builder.token(SyntaxKind(92), chunk);
let kind = match chunk {
"{" => 1,
"}" => 2,
_ => 3,
};
builder.token(SyntaxKind(kind), chunk);
}
builder.finish_node();
let (node, cache) = builder.finish();

View file

@ -170,66 +170,115 @@ impl<L: Language, D> SyntaxToken<L, D> {
}
/// Uses the provided resolver to return the source text of this token.
///
/// If no text is explicitly associated with the token, returns its [`static_text`](SyntaxToken::static_text)
/// instead.
#[inline]
pub fn resolve_text<'i, I>(&self, resolver: &'i I) -> &'i str
where
I: Resolver + ?Sized,
{
self.green().text(resolver)
// one of the two must be present upon construction
self.static_text().or_else(|| self.green().text(resolver)).unwrap()
}
/// If the [syntax kind](Language::Kind) of this token always represents the same text, returns
/// that text.
///
/// # Examples
/// If there is a syntax kind `Plus` that represents just the `+` operator and we implement
/// [`Language::static_text`] for it, we can retrieve this text in the resulting syntax tree.
///
/// ```
/// # use cstree::testing::*;
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
/// # builder.start_node(Root);
/// # builder.token(Identifier, "x");
/// # builder.token(Whitespace, " ");
/// # builder.token(Plus, "+");
/// # builder.token(Whitespace, " ");
/// # builder.token(Int, "3");
/// # builder.finish_node();
/// let tree = parse(&mut builder, "x + 3");
/// # let tree: SyntaxNode<MyLanguage> = SyntaxNode::new_root(builder.finish().0);
/// let plus = tree
/// .children_with_tokens()
/// .nth(2) // `x`, then a space, then `+`
/// .unwrap()
/// .into_token()
/// .unwrap();
/// assert_eq!(plus.static_text(), Some("+"));
/// ```
#[inline(always)]
pub fn static_text(&self) -> Option<&'static str> {
L::static_text(self.kind())
}
/// Returns `true` if `self` and `other` represent equal source text.
///
/// This method is different from the `PartialEq` and `Eq` implementations in that it compares
/// the text and not the token position.
/// only the token text and not its source position.
/// It is more efficient than comparing the result of
/// [`resolve_text`](SyntaxToken::resolve_text) because it compares the tokens' interned
/// [`text_key`s](SyntaxToken::text_key).
/// [`text_key`s](SyntaxToken::text_key) (if their text is not static) or their kind (if it is).
/// Therefore, it also does not require a [`Resolver`].
///
/// **Note** that the result of the comparison may be wrong when comparing two tokens from
/// different trees that use different interners.
///
/// # Examples
/// ```
/// # use cstree::testing::*;
/// let mut builder: GreenNodeBuilder<MyLanguage> = GreenNodeBuilder::new();
/// # builder.start_node(Root);
/// # builder.token(Identifier, "x");
/// # builder.token(Whitespace, " ");
/// # builder.token(Plus, "+");
/// # builder.token(Whitespace, " ");
/// # builder.token(Identifier, "x");
/// # builder.token(Whitespace, " ");
/// # builder.token(Plus, "+");
/// # builder.token(Int, "3");
/// # builder.finish_node();
/// let tree = parse(&mut builder, "x + x + 3");
/// # let tree: SyntaxNode<MyLanguage> = SyntaxNode::new_root(builder.finish().0);
/// let mut tokens = tree.children_with_tokens();
/// let tokens = tokens.by_ref();
/// let first_x = tokens.next().unwrap().into_token().unwrap();
///
/// // For the other tokens, skip over the whitespace between them
/// let first_plus = tokens.skip(1).next().unwrap().into_token().unwrap();
/// let second_x = tokens.skip(1).next().unwrap().into_token().unwrap();
/// let second_plus = tokens.skip(1).next().unwrap().into_token().unwrap();
/// assert!(first_x.text_eq(&second_x));
/// assert!(first_plus.text_eq(&second_plus));
/// ```
#[inline]
pub fn text_eq(&self, other: &Self) -> bool {
self.text_key() == other.text_key()
if let Some(k1) = self.green().text_key() {
match other.green().text_key() {
Some(k2) => return k1 == k2,
None => return false, // a kind with static text cannot be equal to one with non-static text
}
}
/// Returns the interned key of text covered by this token.
debug_assert!(self.static_text().is_some());
debug_assert!(other.static_text().is_some());
self.syntax_kind() == other.syntax_kind()
}
/// Returns the interned key of text covered by this token, if any.
/// This key may be used for comparisons with other keys of strings interned by the same interner.
///
/// See also [`resolve_text`](SyntaxToken::resolve_text) and [`text_eq`](SyntaxToken::text_eq).
///
/// # Examples
/// If you intern strings inside of your application, e.g. inside of a compiler, you can use
/// If you intern strings inside of your application, like inside a compiler, you can use
/// token's text keys to cross-reference between the syntax tree and the rest of your
/// implementation by re-using the interner in both.
/// ```
/// # use cstree::*;
/// # use cstree::interning::{Hasher, Rodeo, Key, new_interner};
/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// # #[repr(u16)]
/// # enum SyntaxKind {
/// # ROOT,
/// # INT,
/// # }
/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// # enum Lang {}
/// # impl cstree::Language for Lang {
/// # type Kind = SyntaxKind;
/// #
/// # fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
/// # assert!(raw.0 <= SyntaxKind::INT as u16);
/// # unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
/// # }
/// #
/// # fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
/// # cstree::SyntaxKind(kind as u16)
/// # }
/// # }
/// # type SyntaxNode<L> = cstree::SyntaxNode<L, ()>;
/// # const ROOT: cstree::SyntaxKind = cstree::SyntaxKind(0);
/// # const IDENT: cstree::SyntaxKind = cstree::SyntaxKind(1);
/// # fn parse(b: &mut GreenNodeBuilder<Rodeo>, s: &str) {}
/// #
/// # use cstree::testing::*;
/// use cstree::interning::{new_interner, Hasher, Key, Rodeo};
/// struct TypeTable {
/// // ...
/// }
@ -243,15 +292,19 @@ impl<L: Language, D> SyntaxToken<L, D> {
/// # interner: Rodeo,
/// # type_table: TypeTable,
/// # }
/// # let interner = new_interner();
/// # let state = &mut State { interner, type_table: TypeTable{} };
/// let mut builder = GreenNodeBuilder::with_interner(&mut state.interner);
/// let interner = new_interner();
/// let mut state = State {
/// interner,
/// type_table: TypeTable{ /* stuff */},
/// };
/// let mut builder: GreenNodeBuilder<MyLanguage, Rodeo> =
/// GreenNodeBuilder::with_interner(&mut state.interner);
/// # let input = "";
/// # builder.start_node(ROOT);
/// # builder.token(IDENT, "x");
/// # builder.start_node(Root);
/// # builder.token(Identifier, "x");
/// # builder.finish_node();
/// let tree = parse(&mut builder, "x");
/// # let tree = SyntaxNode::<Lang>::new_root(builder.finish().0);
/// # let tree: SyntaxNode<MyLanguage> = SyntaxNode::new_root(builder.finish().0);
/// let type_table = &state.type_table;
/// let ident = tree
/// .children_with_tokens()
@ -259,10 +312,10 @@ impl<L: Language, D> SyntaxToken<L, D> {
/// .unwrap()
/// .into_token()
/// .unwrap();
/// let typ = type_table.type_of(ident.text_key());
/// let typ = type_table.type_of(ident.text_key().unwrap());
/// ```
#[inline]
pub fn text_key(&self) -> Key {
pub fn text_key(&self) -> Option<Key> {
self.green().text_key()
}

View file

@ -88,6 +88,47 @@ impl<T> WalkEvent<T> {
}
}
#[derive(Debug)]
pub(crate) enum MaybeOwned<'a, T> {
Owned(T),
Borrowed(&'a mut T),
}
impl<T> MaybeOwned<'_, T> {
pub(crate) fn into_owned(self) -> Option<T> {
match self {
MaybeOwned::Owned(owned) => Some(owned),
MaybeOwned::Borrowed(_) => None,
}
}
}
impl<T> std::ops::Deref for MaybeOwned<'_, T> {
type Target = T;
fn deref(&self) -> &T {
match self {
MaybeOwned::Owned(it) => it,
MaybeOwned::Borrowed(it) => *it,
}
}
}
impl<T> std::ops::DerefMut for MaybeOwned<'_, T> {
fn deref_mut(&mut self) -> &mut T {
match self {
MaybeOwned::Owned(it) => it,
MaybeOwned::Borrowed(it) => *it,
}
}
}
impl<T: Default> Default for MaybeOwned<'_, T> {
fn default() -> Self {
MaybeOwned::Owned(T::default())
}
}
/// There might be zero, one or two leaves at a given offset.
#[derive(Clone, Debug)]
pub enum TokenAtOffset<T> {

View file

@ -3,7 +3,7 @@ use cstree::{GreenNodeBuilder, NodeCache, SyntaxKind, TextRange};
use lasso::{Resolver, Rodeo};
fn build_tree<D>(root: &Element<'_>) -> (SyntaxNode<D>, impl Resolver) {
let mut builder = GreenNodeBuilder::new();
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
build_recursive(root, &mut builder, 0);
let (node, cache) = builder.finish();
(SyntaxNode::new_root(node), cache.unwrap().into_interner().unwrap())
@ -178,5 +178,5 @@ fn assert_debug_display() {
f::<cstree::NodeOrToken<String, u128>>();
fn dbg<T: fmt::Debug>() {}
dbg::<GreenNodeBuilder<'static, 'static>>();
dbg::<GreenNodeBuilder<'static, 'static, TestLang>>();
}

View file

@ -35,21 +35,30 @@ impl Language for TestLang {
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
kind
}
fn static_text(_kind: Self::Kind) -> Option<&'static str> {
None
}
}
pub fn build_tree_with_cache<'c, 'i, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode
where
I: Interner,
{
let mut builder = GreenNodeBuilder::with_cache(cache);
let mut builder: GreenNodeBuilder<TestLang, I> = GreenNodeBuilder::with_cache(cache);
build_recursive(root, &mut builder, 0);
let (node, cache) = builder.finish();
assert!(cache.is_none());
node
}
pub fn build_recursive<'c, 'i, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'c, 'i, I>, mut from: u16) -> u16
pub fn build_recursive<'c, 'i, L, I>(
root: &Element<'_>,
builder: &mut GreenNodeBuilder<'c, 'i, L, I>,
mut from: u16,
) -> u16
where
L: Language<Kind = SyntaxKind>,
I: Interner,
{
match root {

View file

@ -24,9 +24,13 @@ fn empty_tree_arc() {
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
cstree::SyntaxKind(kind as u16)
}
fn static_text(_kind: Self::Kind) -> Option<&'static str> {
None
}
let mut builder = GreenNodeBuilder::new();
builder.start_node(SyntaxKind(0));
}
let mut builder: GreenNodeBuilder<Lang> = GreenNodeBuilder::new();
builder.start_node(SyntaxKind::Root);
builder.finish_node();
let (green, _) = builder.finish();
let root: SyntaxNode<Lang> = SyntaxNode::new_root(green);

View file

@ -3,11 +3,11 @@
use crossbeam_utils::thread::scope;
use std::{thread, time::Duration};
use super::{build_recursive, Element, ResolvedNode, SyntaxNode};
use super::{build_recursive, Element, ResolvedNode, SyntaxNode, TestLang};
use cstree::{interning::IntoResolver, GreenNodeBuilder};
fn build_tree<D>(root: &Element<'_>) -> ResolvedNode<D> {
let mut builder = GreenNodeBuilder::new();
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
build_recursive(root, &mut builder, 0);
let (node, cache) = builder.finish();
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap().into_resolver())

View file

@ -1,6 +1,6 @@
use crate::{build_recursive, build_tree_with_cache, ResolvedNode};
use super::{Element, SyntaxNode};
use super::{Element, SyntaxNode, TestLang};
use cstree::{
interning::{new_interner, IntoResolver},
GreenNodeBuilder, NodeCache, NodeOrToken,
@ -224,7 +224,7 @@ fn three_level_tree() -> Element<'static> {
}
fn build_tree(root: Element<'_>) -> ResolvedNode<String> {
let mut builder = GreenNodeBuilder::new();
let mut builder: GreenNodeBuilder<TestLang> = GreenNodeBuilder::new();
build_recursive(&root, &mut builder, 0);
let (node, cache) = builder.finish();
SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap().into_resolver())