From e7b00a603e8782eb45976b7ccb4883eac25aac57 Mon Sep 17 00:00:00 2001 From: DQ Date: Thu, 25 Aug 2022 22:22:45 +0200 Subject: [PATCH] Performance Improvements (#43) - add `Language::static_text` and optimize static tokens - re-use existing `ThinArc`s in `GreenNodeBuilder::finish_node` - replace `*mut` in `SyntaxNode` with `NonNull` - add CHANGELOG --- CHANGELOG.md | 8 ++ benches/main.rs | 94 ++++++++++++--- examples/math.rs | 20 +++- examples/s_expressions.rs | 26 ++-- src/green.rs | 5 +- src/green/builder.rs | 241 ++++++++++++++++++-------------------- src/green/token.rs | 24 ++-- src/lib.rs | 76 ++++++++++-- src/serde_impls.rs | 6 +- src/syntax/mod.rs | 31 +++++ src/syntax/node.rs | 112 ++++++------------ src/syntax/resolved.rs | 33 +----- src/syntax/text.rs | 61 +++++----- src/syntax/token.rs | 135 ++++++++++++++------- src/utility_types.rs | 41 +++++++ tests/it/basic.rs | 4 +- tests/it/main.rs | 13 +- tests/it/regressions.rs | 8 +- tests/it/sendsync.rs | 4 +- tests/it/serde.rs | 4 +- 20 files changed, 575 insertions(+), 371 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..c798315 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,8 @@ +# Changelog + +## `v0.12.0` + + * Introduced `Language::static_text` to optimize tokens that always appear with the same text (estimated 10-15% faster tree building when used, depending on the ratio of static to dynamic tokens). + * Since `cstree`s are lossless, `GreenNodeBuilder::token` must still be passed the source text even for static tokens. + * Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements. + * Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option` is now the same size as `SyntaxNode` itself: the size of a pointer). \ No newline at end of file diff --git a/benches/main.rs b/benches/main.rs index 95b7796..2ce7cf3 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -1,52 +1,104 @@ -use criterion::{criterion_group, criterion_main, Criterion, Throughput}; +use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput}; use cstree::*; use lasso::{Interner, Rodeo}; +use std::{fmt, hash::Hash}; #[derive(Debug)] pub enum Element<'s> { Node(Vec>), Token(&'s str), + Plus, +} + +#[derive(Debug, Clone, Copy)] +pub enum TestKind { + Element { n: u16 }, + Plus, +} + +pub trait Bool: Hash + Ord + fmt::Debug + Copy { + const VALUE: bool; } #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub enum TestLang {} -impl Language for TestLang { - type Kind = SyntaxKind; +pub struct TestLang { + _marker: std::marker::PhantomData, +} + +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct NoStaticText; +impl Bool for NoStaticText { + const VALUE: bool = false; +} + +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct UseStaticText; +impl Bool for UseStaticText { + const VALUE: bool = true; +} + +impl Language for TestLang { + type Kind = TestKind; fn kind_from_raw(raw: SyntaxKind) -> Self::Kind { - raw + if raw.0 == u16::MAX - 1 { + TestKind::Plus + } else { + TestKind::Element { n: raw.0 } + } } fn kind_to_raw(kind: Self::Kind) -> SyntaxKind { - kind + match kind { + TestKind::Element { n } => SyntaxKind(n), + TestKind::Plus => SyntaxKind(u16::MAX - 1), + } + } + + fn static_text(kind: Self::Kind) -> Option<&'static str> { + if !::VALUE { + return None; + } + + match kind { + TestKind::Plus => Some("+"), + TestKind::Element { .. } => None, + } } } -pub fn build_tree_with_cache<'c, 'i, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode +pub fn build_tree_with_cache<'c, 'i, T: Bool, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode where I: Interner, { - let mut builder = GreenNodeBuilder::with_cache(cache); + let mut builder: GreenNodeBuilder, I> = GreenNodeBuilder::with_cache(cache); build_recursive(root, &mut builder, 0); let (node, cache) = builder.finish(); assert!(cache.is_none()); node } -pub fn build_recursive<'c, 'i, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'c, 'i, I>, mut from: u16) -> u16 +pub fn build_recursive<'c, 'i, T: Bool, I>( + root: &Element<'_>, + builder: &mut GreenNodeBuilder<'c, 'i, TestLang, I>, + mut from: u16, +) -> u16 where I: Interner, { match root { Element::Node(children) => { - builder.start_node(SyntaxKind(from)); + builder.start_node(TestKind::Element { n: from }); for child in children { from = build_recursive(child, builder, from + 1); } builder.finish_node(); } Element::Token(text) => { - builder.token(SyntaxKind(from), *text); + builder.token(TestKind::Element { n: from }, *text); + } + Element::Plus => { + builder.token(TestKind::Plus, "+"); } } from @@ -55,25 +107,31 @@ where fn two_level_tree() -> Element<'static> { use Element::*; Node(vec![ - Node(vec![Token("0.0"), Token("0.1")]), + Node(vec![Token("0.0"), Plus, Token("0.1")]), Node(vec![Token("1.0")]), - Node(vec![Token("2.0"), Token("2.1"), Token("2.2")]), + Node(vec![Token("2.0"), Plus, Token("2.1"), Plus, Token("2.2")]), ]) } pub fn create(c: &mut Criterion) { - let mut group = c.benchmark_group("qualification"); + let mut group = c.benchmark_group("two-level tree"); group.throughput(Throughput::Elements(1)); let mut interner = Rodeo::new(); let mut cache = NodeCache::with_interner(&mut interner); let tree = two_level_tree(); - group.bench_function("two-level tree", |b| { + group.bench_function("with static text", |b| { b.iter(|| { - for _ in 0..100_000 { - let _tree = build_tree_with_cache(&tree, &mut cache); - } + let tree = build_tree_with_cache::(&tree, &mut cache); + black_box(tree); + }) + }); + + group.bench_function("without static text", |b| { + b.iter(|| { + let tree = build_tree_with_cache::(&tree, &mut cache); + black_box(tree); }) }); diff --git a/examples/math.rs b/examples/math.rs index 6ed2370..ad0ce2d 100644 --- a/examples/math.rs +++ b/examples/math.rs @@ -55,6 +55,16 @@ impl cstree::Language for Lang { fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind { kind.into() } + + fn static_text(kind: Self::Kind) -> Option<&'static str> { + match kind { + Add => Some("+"), + Sub => Some("-"), + Mul => Some("*"), + Div => Some("/"), + _ => None, + } + } } type SyntaxNode = cstree::SyntaxNode; @@ -65,7 +75,7 @@ type SyntaxElement = cstree::NodeOrToken; type SyntaxElementRef<'a> = cstree::NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>; struct Parser<'input, I: Iterator> { - builder: GreenNodeBuilder<'static, 'static>, + builder: GreenNodeBuilder<'static, 'static, Lang>, iter: Peekable, } impl<'input, I: Iterator> Parser<'input, I> { @@ -78,7 +88,7 @@ impl<'input, I: Iterator> Parser<'input, I> { fn bump(&mut self) { if let Some((token, string)) = self.iter.next() { - self.builder.token(token.into(), string); + self.builder.token(token, string); } } @@ -86,7 +96,7 @@ impl<'input, I: Iterator> Parser<'input, I> { match self.peek() { Some(Number) => self.bump(), _ => { - self.builder.start_node(Error.into()); + self.builder.start_node(Error); self.bump(); self.builder.finish_node(); } @@ -97,7 +107,7 @@ impl<'input, I: Iterator> Parser<'input, I> { let checkpoint = self.builder.checkpoint(); next(self); while self.peek().map(|t| tokens.contains(&t)).unwrap_or(false) { - self.builder.start_node_at(checkpoint, Operation.into()); + self.builder.start_node_at(checkpoint, Operation); self.bump(); next(self); self.builder.finish_node(); @@ -113,7 +123,7 @@ impl<'input, I: Iterator> Parser<'input, I> { } fn parse(mut self) -> (SyntaxNode, impl Resolver) { - self.builder.start_node(Root.into()); + self.builder.start_node(Root); self.parse_add(); self.builder.finish_node(); diff --git a/examples/s_expressions.rs b/examples/s_expressions.rs index 6883547..dbe8128 100644 --- a/examples/s_expressions.rs +++ b/examples/s_expressions.rs @@ -52,6 +52,14 @@ impl cstree::Language for Lang { fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind { kind.into() } + + fn static_text(kind: Self::Kind) -> Option<&'static str> { + match kind { + LParen => Some("("), + RParen => Some(")"), + _ => None, + } + } } /// GreenNode is an immutable tree, which caches identical nodes and tokens, but doesn't contain @@ -60,7 +68,7 @@ impl cstree::Language for Lang { /// the Resolver to get the real text back from the interned representation. use cstree::{ interning::{IntoResolver, Resolver}, - GreenNode, + GreenNode, Language, }; /// You can construct GreenNodes by hand, but a builder is helpful for top-down parsers: it maintains @@ -84,7 +92,7 @@ fn parse(text: &str) -> Parse { /// input tokens, including whitespace. tokens: VecDeque<(SyntaxKind, &'input str)>, /// the in-progress green tree. - builder: GreenNodeBuilder<'static, 'static>, + builder: GreenNodeBuilder<'static, 'static, Lang>, /// the list of syntax errors we've accumulated so far. errors: Vec, } @@ -102,13 +110,13 @@ fn parse(text: &str) -> Parse { impl Parser<'_> { fn parse(mut self) -> Parse { // Make sure that the root node covers all source - self.builder.start_node(Root.into()); + self.builder.start_node(Root); // Parse zero or more S-expressions loop { match self.sexp() { SexpRes::Eof => break, SexpRes::RParen => { - self.builder.start_node(Error.into()); + self.builder.start_node(Error); self.errors.push("unmatched `)`".to_string()); self.bump(); // be sure to advance even in case of an error, so as to not get stuck self.builder.finish_node(); @@ -135,7 +143,7 @@ fn parse(text: &str) -> Parse { fn list(&mut self) { assert_eq!(self.current(), Some(LParen)); // Start the list node - self.builder.start_node(List.into()); + self.builder.start_node(List); self.bump(); // '(' loop { match self.sexp() { @@ -166,7 +174,7 @@ fn parse(text: &str) -> Parse { match t { LParen => self.list(), Word => { - self.builder.start_node(Atom.into()); + self.builder.start_node(Atom); self.bump(); self.builder.finish_node(); } @@ -179,7 +187,7 @@ fn parse(text: &str) -> Parse { /// Advance one token, adding it to the current branch of the tree builder. fn bump(&mut self) { let (kind, text) = self.tokens.pop_front().unwrap(); - self.builder.token(kind.into(), text); + self.builder.token(kind, text); } /// Peek at the first unprocessed token @@ -348,7 +356,9 @@ impl ast::Atom { fn text<'r>(&self, resolver: &'r impl Resolver) -> &'r str { match &self.0.green().children().next() { - Some(cstree::NodeOrToken::Token(token)) => token.text(resolver), + Some(cstree::NodeOrToken::Token(token)) => Lang::static_text(Lang::kind_from_raw(token.kind())) + .or_else(|| token.text(resolver)) + .unwrap(), _ => unreachable!(), } } diff --git a/src/green.rs b/src/green.rs index 57b3e98..ac5cf50 100644 --- a/src/green.rs +++ b/src/green.rs @@ -26,12 +26,12 @@ pub struct SyntaxKind(pub u16); #[cfg(test)] mod tests { + use super::*; use node::GreenNodeHead; use token::GreenTokenData; - use super::*; - #[test] + #[cfg_attr(miri, ignore)] fn assert_send_sync() { fn f() {} f::(); @@ -41,6 +41,7 @@ mod tests { } #[test] + #[cfg_attr(miri, ignore)] #[rustfmt::skip] fn assert_green_sizes() { use std::mem::size_of; diff --git a/src/green/builder.rs b/src/green/builder.rs index 3b374fc..dcfb939 100644 --- a/src/green/builder.rs +++ b/src/green/builder.rs @@ -1,15 +1,13 @@ -use std::{ - convert::TryFrom, - hash::{Hash, Hasher}, -}; +use std::hash::{Hash, Hasher}; use fxhash::{FxHashMap, FxHasher32}; use text_size::TextSize; use crate::{ green::{interner::TokenInterner, GreenElement, GreenNode, GreenToken, SyntaxKind}, - interning::Interner, - NodeOrToken, + interning::{Interner, Key}, + utility_types::MaybeOwned, + Language, NodeOrToken, }; use super::{node::GreenNodeHead, token::GreenTokenData}; @@ -36,20 +34,20 @@ impl NodeCache<'static> { /// tokens. To re-use an existing interner, see [`with_interner`](NodeCache::with_interner). /// # Examples /// ``` - /// # use cstree::*; - /// # const ROOT: SyntaxKind = SyntaxKind(0); - /// # const INT: SyntaxKind = SyntaxKind(1); - /// # fn parse(b: &mut GreenNodeBuilder, s: &str) {} + /// # use cstree::testing::{*, Language as _}; + /// // Build a tree /// let mut cache = NodeCache::new(); - /// let mut builder = GreenNodeBuilder::with_cache(&mut cache); - /// # builder.start_node(ROOT); - /// # builder.token(INT, "42"); + /// let mut builder: GreenNodeBuilder = GreenNodeBuilder::with_cache(&mut cache); + /// # builder.start_node(Root); + /// # builder.token(Int, "42"); /// # builder.finish_node(); /// parse(&mut builder, "42"); /// let (tree, _) = builder.finish(); - /// assert_eq!(tree.kind(), ROOT); + /// + /// // Check it out! + /// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root)); /// let int = tree.children().next().unwrap(); - /// assert_eq!(int.kind(), INT); + /// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int)); /// ``` pub fn new() -> Self { Self { @@ -74,23 +72,26 @@ where /// (strings) across tokens. /// # Examples /// ``` - /// # use cstree::*; + /// # use cstree::testing::{*, Language as _}; /// use lasso::Rodeo; - /// # const ROOT: SyntaxKind = SyntaxKind(0); - /// # const INT: SyntaxKind = SyntaxKind(1); - /// # fn parse(b: &mut GreenNodeBuilder, s: &str) {} + /// + /// // Create the builder from a custom `Rodeo` /// let mut interner = Rodeo::new(); /// let mut cache = NodeCache::with_interner(&mut interner); - /// let mut builder = GreenNodeBuilder::with_cache(&mut cache); - /// # builder.start_node(ROOT); - /// # builder.token(INT, "42"); + /// let mut builder: GreenNodeBuilder = GreenNodeBuilder::with_cache(&mut cache); + /// + /// // Construct the tree + /// # builder.start_node(Root); + /// # builder.token(Int, "42"); /// # builder.finish_node(); /// parse(&mut builder, "42"); /// let (tree, _) = builder.finish(); - /// assert_eq!(tree.kind(), ROOT); + /// + /// // Use the tree + /// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root)); /// let int = tree.children().next().unwrap(); - /// assert_eq!(int.kind(), INT); - /// assert_eq!(int.as_token().unwrap().text(&interner), "42"); + /// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int)); + /// assert_eq!(int.as_token().unwrap().text(&interner), Some("42")); /// ``` #[inline] pub fn with_interner(interner: &'i mut I) -> Self { @@ -105,24 +106,27 @@ where /// (strings) across tokens. /// # Examples /// ``` - /// # use cstree::*; + /// # use cstree::testing::{*, Language as _}; /// use lasso::Rodeo; - /// # const ROOT: SyntaxKind = SyntaxKind(0); - /// # const INT: SyntaxKind = SyntaxKind(1); - /// # fn parse(b: &mut GreenNodeBuilder, s: &str) {} + /// + /// // Create the builder from a custom `Rodeo` /// let mut interner = Rodeo::new(); /// let cache = NodeCache::from_interner(interner); - /// let mut builder = GreenNodeBuilder::from_cache(cache); - /// # builder.start_node(ROOT); - /// # builder.token(INT, "42"); + /// let mut builder: GreenNodeBuilder = GreenNodeBuilder::from_cache(cache); + /// + /// // Construct the tree + /// # builder.start_node(Root); + /// # builder.token(Int, "42"); /// # builder.finish_node(); /// parse(&mut builder, "42"); /// let (tree, cache) = builder.finish(); + /// + /// // Use the tree /// let interner = cache.unwrap().into_interner().unwrap(); - /// assert_eq!(tree.kind(), ROOT); + /// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root)); /// let int = tree.children().next().unwrap(); - /// assert_eq!(int.kind(), INT); - /// assert_eq!(int.as_token().unwrap().text(&interner), "42"); + /// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int)); + /// assert_eq!(int.as_token().unwrap().text(&interner), Some("42")); /// ``` #[inline] pub fn from_interner(interner: I) -> Self { @@ -165,10 +169,12 @@ where self.interner.into_owned() } - fn node(&mut self, kind: SyntaxKind, children: &[GreenElement]) -> GreenNode { + fn node(&mut self, kind: L::Kind, all_children: &mut Vec, offset: usize) -> GreenNode { + // NOTE: this fn must remove all children starting at `first_child` from `all_children` before returning + let kind = L::kind_to_raw(kind); let mut hasher = FxHasher32::default(); let mut text_len: TextSize = 0.into(); - for child in children { + for child in &all_children[offset..] { text_len += child.text_len(); child.hash(&mut hasher); } @@ -181,20 +187,26 @@ where // For example, all `#[inline]` in this file share the same green node! // For `libsyntax/parse/parser.rs`, measurements show that deduping saves // 17% of the memory for green nodes! + let children = all_children.drain(offset..); if children.len() <= CHILDREN_CACHE_THRESHOLD { self.get_cached_node(kind, children, text_len, child_hash) } else { - GreenNode::new_with_len_and_hash(kind, children.iter().cloned(), text_len, child_hash) + GreenNode::new_with_len_and_hash(kind, children, text_len, child_hash) } } + #[inline(always)] + fn intern(&mut self, text: &str) -> Key { + self.interner.get_or_intern(text) + } + /// Creates a [`GreenNode`] by looking inside the cache or inserting /// a new node into the cache if it's a cache miss. #[inline] fn get_cached_node( &mut self, kind: SyntaxKind, - children: &[GreenElement], + children: std::vec::Drain<'_, GreenElement>, text_len: TextSize, child_hash: u32, ) -> GreenNode { @@ -205,13 +217,13 @@ where }; self.nodes .entry(head) - .or_insert_with_key(|head| GreenNode::from_head_and_children(head.clone(), children.iter().cloned())) + .or_insert_with_key(|head| GreenNode::from_head_and_children(head.clone(), children)) .clone() } - fn token(&mut self, kind: SyntaxKind, text: &str) -> GreenToken { - let text_len = TextSize::try_from(text.len()).unwrap(); - let text = self.interner.get_or_intern(text); + fn token(&mut self, kind: L::Kind, text: Option, len: u32) -> GreenToken { + let text_len = TextSize::from(len); + let kind = L::kind_to_raw(kind); let data = GreenTokenData { kind, text, text_len }; self.tokens .entry(data) @@ -220,47 +232,6 @@ where } } -#[derive(Debug)] -enum MaybeOwned<'a, T> { - Owned(T), - Borrowed(&'a mut T), -} - -impl MaybeOwned<'_, T> { - fn into_owned(self) -> Option { - match self { - MaybeOwned::Owned(owned) => Some(owned), - MaybeOwned::Borrowed(_) => None, - } - } -} - -impl std::ops::Deref for MaybeOwned<'_, T> { - type Target = T; - - fn deref(&self) -> &T { - match self { - MaybeOwned::Owned(it) => it, - MaybeOwned::Borrowed(it) => *it, - } - } -} - -impl std::ops::DerefMut for MaybeOwned<'_, T> { - fn deref_mut(&mut self) -> &mut T { - match self { - MaybeOwned::Owned(it) => it, - MaybeOwned::Borrowed(it) => *it, - } - } -} - -impl Default for MaybeOwned<'_, T> { - fn default() -> Self { - MaybeOwned::Owned(T::default()) - } -} - /// A checkpoint for maybe wrapping a node. See [`GreenNodeBuilder::checkpoint`] for details. #[derive(Clone, Copy, Debug)] pub struct Checkpoint(usize); @@ -274,28 +245,30 @@ pub struct Checkpoint(usize); /// /// # Examples /// ``` -/// # use cstree::{*, interning::IntoResolver}; -/// # const ROOT: SyntaxKind = SyntaxKind(0); -/// # const INT: SyntaxKind = SyntaxKind(1); -/// let mut builder = GreenNodeBuilder::new(); -/// builder.start_node(ROOT); -/// builder.token(INT, "42"); +/// # use cstree::testing::{*, Language as _}; +/// # use cstree::interning::IntoResolver; +/// // Build a tree +/// let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); +/// builder.start_node(Root); +/// builder.token(Int, "42"); /// builder.finish_node(); /// let (tree, cache) = builder.finish(); -/// assert_eq!(tree.kind(), ROOT); +/// +/// // Check it out! +/// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root)); /// let int = tree.children().next().unwrap(); -/// assert_eq!(int.kind(), INT); +/// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int)); /// let resolver = cache.unwrap().into_interner().unwrap().into_resolver(); -/// assert_eq!(int.as_token().unwrap().text(&resolver), "42"); +/// assert_eq!(int.as_token().unwrap().text(&resolver), Some("42")); /// ``` #[derive(Debug)] -pub struct GreenNodeBuilder<'cache, 'interner, I = TokenInterner> { +pub struct GreenNodeBuilder<'cache, 'interner, L: Language, I = TokenInterner> { cache: MaybeOwned<'cache, NodeCache<'interner, I>>, - parents: Vec<(SyntaxKind, usize)>, + parents: Vec<(L::Kind, usize)>, children: Vec, } -impl GreenNodeBuilder<'static, 'static> { +impl GreenNodeBuilder<'static, 'static, L> { /// Creates new builder with an empty [`NodeCache`]. pub fn new() -> Self { Self { @@ -306,14 +279,15 @@ impl GreenNodeBuilder<'static, 'static> { } } -impl Default for GreenNodeBuilder<'static, 'static> { +impl Default for GreenNodeBuilder<'static, 'static, L> { fn default() -> Self { Self::new() } } -impl<'cache, 'interner, I> GreenNodeBuilder<'cache, 'interner, I> +impl<'cache, 'interner, L, I> GreenNodeBuilder<'cache, 'interner, L, I> where + L: Language, I: Interner, { /// Reusing a [`NodeCache`] between multiple builders saves memory, as it allows to structurally @@ -331,22 +305,24 @@ where /// The `cache` given will be returned on [`finish`](GreenNodeBuilder::finish). /// # Examples /// ``` - /// # use cstree::*; - /// # const ROOT: SyntaxKind = SyntaxKind(0); - /// # const INT: SyntaxKind = SyntaxKind(1); - /// # fn parse(b: &mut GreenNodeBuilder, s: &str) {} + /// # use cstree::testing::{*, Language as _}; + /// // Construct a builder from our own cache /// let cache = NodeCache::new(); - /// let mut builder = GreenNodeBuilder::from_cache(cache); - /// # builder.start_node(ROOT); - /// # builder.token(INT, "42"); + /// let mut builder: GreenNodeBuilder = GreenNodeBuilder::from_cache(cache); + /// + /// // Build a tree + /// # builder.start_node(Root); + /// # builder.token(Int, "42"); /// # builder.finish_node(); /// parse(&mut builder, "42"); /// let (tree, cache) = builder.finish(); + /// + /// // Use the tree /// let interner = cache.unwrap().into_interner().unwrap(); - /// assert_eq!(tree.kind(), ROOT); + /// assert_eq!(tree.kind(), MyLanguage::kind_to_raw(Root)); /// let int = tree.children().next().unwrap(); - /// assert_eq!(int.kind(), INT); - /// assert_eq!(int.as_token().unwrap().text(&interner), "42"); + /// assert_eq!(int.kind(), MyLanguage::kind_to_raw(Int)); + /// assert_eq!(int.as_token().unwrap().text(&interner), Some("42")); /// ``` pub fn from_cache(cache: NodeCache<'interner, I>) -> Self { Self { @@ -390,9 +366,9 @@ where /// This is the same interner as used by the underlying [`NodeCache`]. /// # Examples /// ``` - /// # use cstree::*; + /// # use cstree::testing::*; /// # use cstree::interning::*; - /// let mut builder = GreenNodeBuilder::new(); + /// let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); /// let interner = builder.interner_mut(); /// let key = interner.get_or_intern("foo"); /// assert_eq!(interner.resolve(&key), "foo"); @@ -402,16 +378,34 @@ where &mut *self.cache.interner } - /// Add new token to the current branch. + /// Add a new token to the current branch without storing an explicit section of text. + /// This is be useful if the text can always be inferred from the token's `kind`, for example + /// when using kinds for specific operators or punctuation. + /// + /// ## Panics + /// In debug mode, if `kind` has static text, this function will verify that `text` matches that text. #[inline] - pub fn token(&mut self, kind: SyntaxKind, text: &str) { - let token = self.cache.token(kind, text); + pub fn token(&mut self, kind: L::Kind, text: &str) { + let token = match L::static_text(kind) { + Some(static_text) => { + debug_assert_eq!( + static_text, text, + r#"Received `{kind:?}` token which should have text "{static_text}", but "{text}" was given."# + ); + self.cache.token::(kind, None, static_text.len() as u32) + } + None => { + let len = text.len() as u32; + let text = self.cache.intern(text); + self.cache.token::(kind, Some(text), len) + } + }; self.children.push(token.into()); } /// Start new node of the given `kind` and make it current. #[inline] - pub fn start_node(&mut self, kind: SyntaxKind) { + pub fn start_node(&mut self, kind: L::Kind) { let len = self.children.len(); self.parents.push((kind, len)); } @@ -420,8 +414,8 @@ where #[inline] pub fn finish_node(&mut self) { let (kind, first_child) = self.parents.pop().unwrap(); - let node = self.cache.node(kind, &self.children[first_child..]); - self.children.truncate(first_child); + // NOTE: we rely on the node cache to remove all children starting at `first_child` from `self.children` + let node = self.cache.node::(kind, &mut self.children, first_child); self.children.push(node.into()); } @@ -432,21 +426,20 @@ where /// /// # Examples /// ``` - /// # use cstree::{GreenNodeBuilder, SyntaxKind}; - /// # const PLUS: SyntaxKind = SyntaxKind(0); - /// # const OPERATION: SyntaxKind = SyntaxKind(1); + /// # use cstree::testing::*; + /// # use cstree::{GreenNodeBuilder, Language}; /// # struct Parser; /// # impl Parser { - /// # fn peek(&self) -> Option { None } + /// # fn peek(&self) -> Option { None } /// # fn parse_expr(&mut self) {} /// # } - /// # let mut builder = GreenNodeBuilder::new(); + /// # let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); /// # let mut parser = Parser; /// let checkpoint = builder.checkpoint(); /// parser.parse_expr(); - /// if parser.peek() == Some(PLUS) { + /// if let Some(Plus) = parser.peek() { /// // 1 + 2 = Add(1, 2) - /// builder.start_node_at(checkpoint, OPERATION); + /// builder.start_node_at(checkpoint, Operation); /// parser.parse_expr(); /// builder.finish_node(); /// } @@ -459,7 +452,7 @@ where /// Wrap the previous branch marked by [`checkpoint`](GreenNodeBuilder::checkpoint) in a new /// branch and make it current. #[inline] - pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) { + pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: L::Kind) { let Checkpoint(checkpoint) = checkpoint; assert!( checkpoint <= self.children.len(), diff --git a/src/green/token.rs b/src/green/token.rs index e64be2a..e58ceef 100644 --- a/src/green/token.rs +++ b/src/green/token.rs @@ -1,4 +1,4 @@ -use std::{fmt, hash, mem::ManuallyDrop, ptr}; +use std::{fmt, hash, mem::ManuallyDrop, ptr::NonNull}; use crate::{ green::SyntaxKind, @@ -12,13 +12,13 @@ use triomphe::Arc; #[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] pub(super) struct GreenTokenData { pub(super) kind: SyntaxKind, - pub(super) text: Key, + pub(super) text: Option, pub(super) text_len: TextSize, } /// Leaf node in the immutable "green" tree. pub struct GreenToken { - ptr: ptr::NonNull, + ptr: NonNull, } unsafe impl Send for GreenToken {} // where GreenTokenData: Send + Sync @@ -26,17 +26,17 @@ unsafe impl Sync for GreenToken {} // where GreenTokenData: Send + Sync pub(super) const IS_TOKEN_TAG: usize = 0x1; impl GreenToken { - fn add_tag(ptr: ptr::NonNull) -> ptr::NonNull { + fn add_tag(ptr: NonNull) -> NonNull { unsafe { let ptr = ptr.as_ptr().map_addr(|addr| addr | IS_TOKEN_TAG); - ptr::NonNull::new_unchecked(ptr) + NonNull::new_unchecked(ptr) } } - fn remove_tag(ptr: ptr::NonNull) -> ptr::NonNull { + fn remove_tag(ptr: NonNull) -> NonNull { unsafe { let ptr = ptr.as_ptr().map_addr(|addr| addr & !IS_TOKEN_TAG); - ptr::NonNull::new_unchecked(ptr) + NonNull::new_unchecked(ptr) } } @@ -48,7 +48,7 @@ impl GreenToken { #[inline] pub(super) fn new(data: GreenTokenData) -> GreenToken { let ptr = Arc::into_raw(Arc::new(data)); - let ptr = ptr::NonNull::new(ptr as *mut _).unwrap(); + let ptr = NonNull::new(ptr as *mut _).unwrap(); GreenToken { ptr: Self::add_tag(ptr), } @@ -62,11 +62,11 @@ impl GreenToken { /// The original source text of this Token. #[inline] - pub fn text<'i, I>(&self, resolver: &'i I) -> &'i str + pub fn text<'i, I>(&self, resolver: &'i I) -> Option<&'i str> where I: Resolver + ?Sized, { - resolver.resolve(&self.data().text) + self.data().text.map(|key| resolver.resolve(&key)) } /// Returns the length of text covered by this token. @@ -80,7 +80,7 @@ impl GreenToken { /// /// See also [`text`](GreenToken::text). #[inline] - pub fn text_key(&self) -> Key { + pub fn text_key(&self) -> Option { self.data().text } } @@ -102,7 +102,7 @@ impl Clone for GreenToken { let arc = ManuallyDrop::new(Arc::from_raw(ptr.as_ptr())); Arc::into_raw(Arc::clone(&arc)) }; - let ptr = unsafe { ptr::NonNull::new_unchecked(ptr as *mut _) }; + let ptr = unsafe { NonNull::new_unchecked(ptr as *mut _) }; GreenToken { ptr: Self::add_tag(ptr), } diff --git a/src/lib.rs b/src/lib.rs index 277f258..1200339 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,18 +71,21 @@ pub use crate::{ }; pub use triomphe::Arc; -/// The `Language` trait is the bridge between the internal `cstree` representation and your language -/// types. -/// This is essential to providing a [`SyntaxNode`] API that can be used with your types, as in the +/// The `Language` trait is the bridge between the internal `cstree` representation and your +/// language's types. +/// This is essential for providing a [`SyntaxNode`] API that can be used with your types, as in the /// `s_expressions` example: +/// /// ``` /// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] /// # #[allow(non_camel_case_types)] /// #[repr(u16)] /// enum SyntaxKind { -/// ROOT, // top-level node -/// ATOM, // `+`, `15` -/// WHITESPACE, // whitespaces is explicit +/// Plus, // `+` +/// Minus, // `-` +/// Integer, // like `15` +/// Expression, // combined expression, like `5 + 4 - 3` +/// Whitespace, // whitespaces is explicit /// #[doc(hidden)] /// __LAST, /// } @@ -102,16 +105,75 @@ pub use triomphe::Arc; /// fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind { /// cstree::SyntaxKind(kind as u16) /// } +/// +/// fn static_text(kind: Self::Kind) -> Option<&'static str> { +/// match kind { +/// Plus => Some("+"), +/// Minus => Some("-"), +/// _ => None, +/// } +/// } /// } /// ``` pub trait Language: Sized + Clone + Copy + fmt::Debug + Eq + Ord + std::hash::Hash { /// A type that represents what items in your Language can be. /// Typically, this is an `enum` with variants such as `Identifier`, `Literal`, ... - type Kind: fmt::Debug; + type Kind: Sized + Clone + Copy + fmt::Debug; /// Construct a semantic item kind from the compact representation. fn kind_from_raw(raw: SyntaxKind) -> Self::Kind; /// Convert a semantic item kind into a more compact representation. fn kind_to_raw(kind: Self::Kind) -> SyntaxKind; + + /// Fixed text for a particular syntax kind. + /// + /// Implement for kinds that will only ever represent the same text, such as punctuation (like a + /// semicolon), keywords (like `fn`), or operators (like `<=`). + fn static_text(kind: Self::Kind) -> Option<&'static str>; +} + +#[doc(hidden)] +#[allow(unsafe_code, unused)] +pub mod testing { + pub use crate::*; + pub fn parse(_b: &mut super::GreenNodeBuilder, _s: &str) {} + + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] + #[repr(u16)] + #[allow(non_camel_case_types)] + pub enum TestSyntaxKind { + Plus, + Identifier, + Int, + Float, + Operation, + Root, + Whitespace, + __LAST, + } + pub use TestSyntaxKind::*; + + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub enum TestLang {} + pub type MyLanguage = TestLang; + impl Language for TestLang { + type Kind = TestSyntaxKind; + + fn kind_from_raw(raw: SyntaxKind) -> Self::Kind { + assert!(raw.0 <= TestSyntaxKind::__LAST as u16); + unsafe { std::mem::transmute::(raw.0) } + } + + fn kind_to_raw(kind: Self::Kind) -> SyntaxKind { + SyntaxKind(kind as u16) + } + + fn static_text(kind: Self::Kind) -> Option<&'static str> { + match kind { + TestSyntaxKind::Plus => Some("+"), + _ => None, + } + } + } } diff --git a/src/serde_impls.rs b/src/serde_impls.rs index 31413ce..204c3a4 100644 --- a/src/serde_impls.rs +++ b/src/serde_impls.rs @@ -177,16 +177,16 @@ where where A: SeqAccess<'de>, { - let mut builder = GreenNodeBuilder::new(); + let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); let mut data_indices = VecDeque::new(); while let Some(next) = seq.next_element::>()? { match next { Event::EnterNode(kind, has_data) => { - builder.start_node(kind); + builder.start_node(L::kind_from_raw(kind)); data_indices.push_back(has_data); } - Event::Token(kind, text) => builder.token(kind, text), + Event::Token(kind, text) => builder.token(L::kind_from_raw(kind), text), Event::LeaveNode => builder.finish_node(), } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index df6e19a..9d6dd4a 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -33,3 +33,34 @@ pub use text::SyntaxText; // this. // // - DQ 01/2021 + +#[cfg(test)] +mod tests { + use crate::testing::*; + + #[test] + #[cfg_attr(miri, ignore)] + fn assert_send_sync() { + fn f() {} + f::>(); + f::>(); + f::>(); + f::>(); + + f::>(); + f::>(); + f::>(); + f::>(); + } + + #[test] + #[cfg_attr(miri, ignore)] + #[rustfmt::skip] + fn assert_syntax_sizes() { + use std::mem::size_of; + + assert_eq!(size_of::>(), size_of::<*const u8>()); + assert_eq!(size_of::>>(), size_of::<*const u8>()); // verify niche opt of `NonNull` + assert_eq!(size_of::>(), size_of::<*const u8>() + size_of::() * 2); + } +} diff --git a/src/syntax/node.rs b/src/syntax/node.rs index 5452d3e..64eba62 100644 --- a/src/syntax/node.rs +++ b/src/syntax/node.rs @@ -11,7 +11,8 @@ use std::{ cell::UnsafeCell, fmt, hash::{Hash, Hasher}, - iter, ptr, + iter, + ptr::{self, NonNull}, sync::{ atomic::{AtomicU32, Ordering}, Arc as StdArc, @@ -26,7 +27,7 @@ use triomphe::Arc; #[derive(Debug)] #[repr(transparent)] pub struct SyntaxNode { - data: *mut NodeData, + data: NonNull>, } unsafe impl Send for SyntaxNode {} @@ -158,7 +159,7 @@ impl Drop for SyntaxNode { root.drop_recursive(); let root_data = root.data; drop(root); - unsafe { drop(Box::from_raw(root_data)) }; + unsafe { drop(Box::from_raw(root_data.as_ptr())) }; unsafe { drop(Box::from_raw(ref_count)) }; } } @@ -167,7 +168,7 @@ impl Drop for SyntaxNode { impl SyntaxNode { #[inline] fn data(&self) -> &NodeData { - unsafe { &*self.data } + unsafe { self.data.as_ref() } } #[inline] @@ -209,7 +210,7 @@ impl SyntaxNode { // safety: since there are no more `parent` pointers from the children of the // node this data belonged to, and we have just dropped the node, there are now // no more references to `data` - let data = unsafe { Box::from_raw(data) }; + let data = unsafe { Box::from_raw(data.as_ptr()) }; drop(data); } } @@ -227,7 +228,7 @@ impl Eq for SyntaxNode {} impl Hash for SyntaxNode { fn hash(&self, state: &mut H) { - ptr::hash(self.data, state); + self.data.hash(state); } } @@ -251,7 +252,7 @@ impl Kind { pub(super) struct NodeData { kind: Kind, - green: ptr::NonNull, + green: NonNull, ref_count: *mut AtomicU32, data: RwLock>>, children: Vec>>>, @@ -259,24 +260,21 @@ pub(super) struct NodeData { } impl NodeData { - fn new( - kind: Kind, - green: ptr::NonNull, - ref_count: *mut AtomicU32, - n_children: usize, - ) -> *mut Self { + fn new(kind: Kind, green: NonNull, ref_count: *mut AtomicU32, n_children: usize) -> NonNull { let mut children = Vec::with_capacity(n_children); let mut child_locks = Vec::with_capacity(n_children); children.extend((0..n_children).map(|_| Default::default())); child_locks.extend((0..n_children).map(|_| Default::default())); - Box::into_raw(Box::new(Self { + let ptr = Box::into_raw(Box::new(Self { kind, green, ref_count, data: RwLock::default(), children, child_locks, - })) + })); + // safety: guaranteed by `Box::into_raw` + unsafe { NonNull::new_unchecked(ptr) } } } @@ -285,41 +283,20 @@ impl SyntaxNode { /// /// # Example /// ``` - /// # use cstree::*; - /// # #[allow(non_camel_case_types)] - /// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] - /// #[repr(u16)] - /// enum SyntaxKind { - /// ROOT, - /// } - /// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] - /// enum Lang {} - /// impl cstree::Language for Lang { - /// // ... - /// # type Kind = SyntaxKind; - /// # - /// # fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind { - /// # assert!(raw.0 <= SyntaxKind::ROOT as u16); - /// # unsafe { std::mem::transmute::(raw.0) } - /// # } - /// # - /// # fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind { - /// # cstree::SyntaxKind(kind as u16) - /// # } - /// } - /// # let mut builder = GreenNodeBuilder::new(); - /// # builder.start_node(SyntaxKind(0)); + /// # use cstree::testing::*; + /// # let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); + /// # builder.start_node(Root); /// # builder.finish_node(); - /// # let (green, _) = builder.finish(); - /// let root: SyntaxNode = SyntaxNode::new_root(green); - /// assert_eq!(root.kind(), SyntaxKind::ROOT); + /// # let (green_root, _) = builder.finish(); + /// let root: SyntaxNode = SyntaxNode::new_root(green_root); + /// assert_eq!(root.kind(), Root); /// ``` #[inline] pub fn new_root(green: GreenNode) -> Self { Self::make_new_root(green, None) } - pub(super) fn new(data: *mut NodeData) -> Self { + fn new(data: NonNull>) -> Self { Self { data } } @@ -328,12 +305,12 @@ impl SyntaxNode { let n_children = green.children().count(); let data = NodeData::new( Kind::Root(green, resolver), - ptr::NonNull::dangling(), + NonNull::dangling(), Box::into_raw(ref_count), n_children, ); let ret = Self::new(data); - let green: ptr::NonNull = match &ret.data().kind { + let green: NonNull = match &ret.data().kind { Kind::Root(green, _resolver) => green.into(), _ => unreachable!(), }; @@ -341,7 +318,7 @@ impl SyntaxNode { // Also, we use `addr_of_mut` here in order to not have to go through a `&mut *ret.data`, // which would invalidate the reading provenance of `green`, since `green` is contained in // the date once we have written it here. - unsafe { ptr::addr_of_mut!((*ret.data).green).write(green) }; + unsafe { ptr::addr_of_mut!((*ret.data.as_ptr()).green).write(green) }; ret } @@ -350,39 +327,18 @@ impl SyntaxNode { /// /// # Example /// ``` - /// # use cstree::*; - /// # #[allow(non_camel_case_types)] - /// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] - /// #[repr(u16)] - /// enum SyntaxKind { - /// TOKEN, - /// ROOT, - /// } - /// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] - /// enum Lang {} - /// impl cstree::Language for Lang { - /// // ... - /// # type Kind = SyntaxKind; - /// # - /// # fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind { - /// # assert!(raw.0 <= SyntaxKind::ROOT as u16); - /// # unsafe { std::mem::transmute::(raw.0) } - /// # } - /// # - /// # fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind { - /// # cstree::SyntaxKind(kind as u16) - /// # } - /// } - /// # const ROOT: cstree::SyntaxKind = cstree::SyntaxKind(0); - /// # const TOKEN: cstree::SyntaxKind = cstree::SyntaxKind(1); - /// # type SyntaxNode = cstree::SyntaxNode; - /// let mut builder = GreenNodeBuilder::new(); - /// builder.start_node(ROOT); - /// builder.token(TOKEN, "content"); + /// # use cstree::testing::*; + /// let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); + /// builder.start_node(Root); + /// builder.token(Identifier, "content"); /// builder.finish_node(); /// let (green, cache) = builder.finish(); - /// let root: ResolvedNode = - /// SyntaxNode::new_root_with_resolver(green, cache.unwrap().into_interner().unwrap()); + /// + /// // We are safe to use `unwrap` here because we created the builder with `new`. + /// // This created a new interner and cache for us owned by the builder, + /// // and `finish` always returns these. + /// let interner = cache.unwrap().into_interner().unwrap(); + /// let root: ResolvedNode = SyntaxNode::new_root_with_resolver(green, interner); /// assert_eq!(root.text(), "content"); /// ``` #[inline] @@ -485,7 +441,7 @@ impl SyntaxNode { ref_count.fetch_add(2, Ordering::AcqRel); let node_data = node.data; drop(node); - unsafe { drop(Box::from_raw(node_data)) }; + unsafe { drop(Box::from_raw(node_data.as_ptr())) }; } SyntaxElement::Token(token) => { // We don't have to worry about `NodeData` or `SyntaxToken`'s own `Drop` here, diff --git a/src/syntax/resolved.rs b/src/syntax/resolved.rs index 906db69..333720b 100644 --- a/src/syntax/resolved.rs +++ b/src/syntax/resolved.rs @@ -198,7 +198,10 @@ impl ResolvedToken { /// Uses the resolver associated with this tree to return the source text of this token. #[inline] pub fn text(&self) -> &str { - self.green().text(&**self.resolver()) + // one of the two must be present upon construction + self.static_text() + .or_else(|| self.green().text(&**self.resolver())) + .unwrap() } } @@ -725,31 +728,3 @@ impl<'a, L: Language, D> ResolvedElementRef<'a, L, D> { } } } - -#[test] -fn assert_send_sync() { - use crate::SyntaxKind; - - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] - enum L {} - #[derive(Debug)] - enum Kind { - Var, - } - impl Language for L { - type Kind = Kind; - - fn kind_from_raw(_: SyntaxKind) -> Self::Kind { - Kind::Var - } - - fn kind_to_raw(_: Self::Kind) -> SyntaxKind { - SyntaxKind(0) - } - } - fn f() {} - f::>(); - f::>(); - f::>(); - f::>(); -} diff --git a/src/syntax/text.rs b/src/syntax/text.rs index 7b577e6..712ce30 100644 --- a/src/syntax/text.rs +++ b/src/syntax/text.rs @@ -13,43 +13,21 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T /// /// # Example /// ``` -/// # use cstree::{*, interning::IntoResolver}; -/// # #[allow(non_camel_case_types)] -/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -/// # #[repr(u16)] -/// # enum SyntaxKind { -/// # TOKEN, -/// # ROOT, -/// # } -/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -/// # enum Lang {} -/// # impl cstree::Language for Lang { -/// # type Kind = SyntaxKind; +/// # use cstree::testing::*; +/// # use cstree::interning::IntoResolver; /// # -/// # fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind { -/// # assert!(raw.0 <= SyntaxKind::ROOT as u16); -/// # unsafe { std::mem::transmute::(raw.0) } -/// # } -/// # -/// # fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind { -/// # cstree::SyntaxKind(kind as u16) -/// # } -/// # } -/// # type SyntaxNode = cstree::SyntaxNode; -/// # type ResolvedNode = cstree::ResolvedNode; -/// # -/// # fn parse_float_literal(s: &str) -> ResolvedNode { -/// # const LITERAL: cstree::SyntaxKind = cstree::SyntaxKind(0); -/// # let mut builder = GreenNodeBuilder::new(); -/// # builder.start_node(LITERAL); -/// # builder.token(LITERAL, s); +/// fn parse_float_literal(s: &str) -> ResolvedNode { +/// // parsing... +/// # let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); +/// # builder.start_node(Float); +/// # builder.token(Float, s); /// # builder.finish_node(); /// # let (root, cache) = builder.finish(); /// # let resolver = cache.unwrap().into_interner().unwrap().into_resolver(); /// # SyntaxNode::new_root_with_resolver(root, resolver) -/// # } -/// let node = parse_float_literal("2.748E2"); -/// let text = node.text(); +/// } +/// let float_node = parse_float_literal("2.748E2"); +/// let text = float_node.text(); /// assert_eq!(text.len(), 7.into()); /// assert!(text.contains_char('E')); /// assert_eq!(text.find_char('E'), Some(5.into())); @@ -412,13 +390,28 @@ mod tests { fn kind_to_raw(kind: Self::Kind) -> SyntaxKind { kind } + + fn static_text(kind: Self::Kind) -> Option<&'static str> { + if kind == SyntaxKind(1) { + Some("{") + } else if kind == SyntaxKind(2) { + Some("}") + } else { + None + } + } } fn build_tree(chunks: &[&str]) -> (SyntaxNode, impl Resolver) { - let mut builder = GreenNodeBuilder::new(); + let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); builder.start_node(SyntaxKind(62)); for &chunk in chunks.iter() { - builder.token(SyntaxKind(92), chunk); + let kind = match chunk { + "{" => 1, + "}" => 2, + _ => 3, + }; + builder.token(SyntaxKind(kind), chunk); } builder.finish_node(); let (node, cache) = builder.finish(); diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 800c21d..661a4d3 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -170,66 +170,115 @@ impl SyntaxToken { } /// Uses the provided resolver to return the source text of this token. + /// + /// If no text is explicitly associated with the token, returns its [`static_text`](SyntaxToken::static_text) + /// instead. #[inline] pub fn resolve_text<'i, I>(&self, resolver: &'i I) -> &'i str where I: Resolver + ?Sized, { - self.green().text(resolver) + // one of the two must be present upon construction + self.static_text().or_else(|| self.green().text(resolver)).unwrap() + } + + /// If the [syntax kind](Language::Kind) of this token always represents the same text, returns + /// that text. + /// + /// # Examples + /// If there is a syntax kind `Plus` that represents just the `+` operator and we implement + /// [`Language::static_text`] for it, we can retrieve this text in the resulting syntax tree. + /// + /// ``` + /// # use cstree::testing::*; + /// let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); + /// # builder.start_node(Root); + /// # builder.token(Identifier, "x"); + /// # builder.token(Whitespace, " "); + /// # builder.token(Plus, "+"); + /// # builder.token(Whitespace, " "); + /// # builder.token(Int, "3"); + /// # builder.finish_node(); + /// let tree = parse(&mut builder, "x + 3"); + /// # let tree: SyntaxNode = SyntaxNode::new_root(builder.finish().0); + /// let plus = tree + /// .children_with_tokens() + /// .nth(2) // `x`, then a space, then `+` + /// .unwrap() + /// .into_token() + /// .unwrap(); + /// assert_eq!(plus.static_text(), Some("+")); + /// ``` + #[inline(always)] + pub fn static_text(&self) -> Option<&'static str> { + L::static_text(self.kind()) } /// Returns `true` if `self` and `other` represent equal source text. /// /// This method is different from the `PartialEq` and `Eq` implementations in that it compares - /// the text and not the token position. + /// only the token text and not its source position. /// It is more efficient than comparing the result of /// [`resolve_text`](SyntaxToken::resolve_text) because it compares the tokens' interned - /// [`text_key`s](SyntaxToken::text_key). + /// [`text_key`s](SyntaxToken::text_key) (if their text is not static) or their kind (if it is). /// Therefore, it also does not require a [`Resolver`]. + /// /// **Note** that the result of the comparison may be wrong when comparing two tokens from /// different trees that use different interners. + /// + /// # Examples + /// ``` + /// # use cstree::testing::*; + /// let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); + /// # builder.start_node(Root); + /// # builder.token(Identifier, "x"); + /// # builder.token(Whitespace, " "); + /// # builder.token(Plus, "+"); + /// # builder.token(Whitespace, " "); + /// # builder.token(Identifier, "x"); + /// # builder.token(Whitespace, " "); + /// # builder.token(Plus, "+"); + /// # builder.token(Int, "3"); + /// # builder.finish_node(); + /// let tree = parse(&mut builder, "x + x + 3"); + /// # let tree: SyntaxNode = SyntaxNode::new_root(builder.finish().0); + /// let mut tokens = tree.children_with_tokens(); + /// let tokens = tokens.by_ref(); + /// let first_x = tokens.next().unwrap().into_token().unwrap(); + /// + /// // For the other tokens, skip over the whitespace between them + /// let first_plus = tokens.skip(1).next().unwrap().into_token().unwrap(); + /// let second_x = tokens.skip(1).next().unwrap().into_token().unwrap(); + /// let second_plus = tokens.skip(1).next().unwrap().into_token().unwrap(); + /// assert!(first_x.text_eq(&second_x)); + /// assert!(first_plus.text_eq(&second_plus)); + /// ``` #[inline] pub fn text_eq(&self, other: &Self) -> bool { - self.text_key() == other.text_key() + if let Some(k1) = self.green().text_key() { + match other.green().text_key() { + Some(k2) => return k1 == k2, + None => return false, // a kind with static text cannot be equal to one with non-static text + } + } + + debug_assert!(self.static_text().is_some()); + debug_assert!(other.static_text().is_some()); + self.syntax_kind() == other.syntax_kind() } - /// Returns the interned key of text covered by this token. + /// Returns the interned key of text covered by this token, if any. /// This key may be used for comparisons with other keys of strings interned by the same interner. /// /// See also [`resolve_text`](SyntaxToken::resolve_text) and [`text_eq`](SyntaxToken::text_eq). /// /// # Examples - /// If you intern strings inside of your application, e.g. inside of a compiler, you can use + /// If you intern strings inside of your application, like inside a compiler, you can use /// token's text keys to cross-reference between the syntax tree and the rest of your /// implementation by re-using the interner in both. /// ``` - /// # use cstree::*; - /// # use cstree::interning::{Hasher, Rodeo, Key, new_interner}; - /// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] - /// # #[repr(u16)] - /// # enum SyntaxKind { - /// # ROOT, - /// # INT, - /// # } - /// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] - /// # enum Lang {} - /// # impl cstree::Language for Lang { - /// # type Kind = SyntaxKind; - /// # - /// # fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind { - /// # assert!(raw.0 <= SyntaxKind::INT as u16); - /// # unsafe { std::mem::transmute::(raw.0) } - /// # } - /// # - /// # fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind { - /// # cstree::SyntaxKind(kind as u16) - /// # } - /// # } - /// # type SyntaxNode = cstree::SyntaxNode; - /// # const ROOT: cstree::SyntaxKind = cstree::SyntaxKind(0); - /// # const IDENT: cstree::SyntaxKind = cstree::SyntaxKind(1); - /// # fn parse(b: &mut GreenNodeBuilder, s: &str) {} - /// # + /// # use cstree::testing::*; + /// use cstree::interning::{new_interner, Hasher, Key, Rodeo}; /// struct TypeTable { /// // ... /// } @@ -243,15 +292,19 @@ impl SyntaxToken { /// # interner: Rodeo, /// # type_table: TypeTable, /// # } - /// # let interner = new_interner(); - /// # let state = &mut State { interner, type_table: TypeTable{} }; - /// let mut builder = GreenNodeBuilder::with_interner(&mut state.interner); + /// let interner = new_interner(); + /// let mut state = State { + /// interner, + /// type_table: TypeTable{ /* stuff */}, + /// }; + /// let mut builder: GreenNodeBuilder = + /// GreenNodeBuilder::with_interner(&mut state.interner); /// # let input = ""; - /// # builder.start_node(ROOT); - /// # builder.token(IDENT, "x"); + /// # builder.start_node(Root); + /// # builder.token(Identifier, "x"); /// # builder.finish_node(); /// let tree = parse(&mut builder, "x"); - /// # let tree = SyntaxNode::::new_root(builder.finish().0); + /// # let tree: SyntaxNode = SyntaxNode::new_root(builder.finish().0); /// let type_table = &state.type_table; /// let ident = tree /// .children_with_tokens() @@ -259,10 +312,10 @@ impl SyntaxToken { /// .unwrap() /// .into_token() /// .unwrap(); - /// let typ = type_table.type_of(ident.text_key()); + /// let typ = type_table.type_of(ident.text_key().unwrap()); /// ``` #[inline] - pub fn text_key(&self) -> Key { + pub fn text_key(&self) -> Option { self.green().text_key() } diff --git a/src/utility_types.rs b/src/utility_types.rs index 88ed89e..77b770c 100644 --- a/src/utility_types.rs +++ b/src/utility_types.rs @@ -88,6 +88,47 @@ impl WalkEvent { } } +#[derive(Debug)] +pub(crate) enum MaybeOwned<'a, T> { + Owned(T), + Borrowed(&'a mut T), +} + +impl MaybeOwned<'_, T> { + pub(crate) fn into_owned(self) -> Option { + match self { + MaybeOwned::Owned(owned) => Some(owned), + MaybeOwned::Borrowed(_) => None, + } + } +} + +impl std::ops::Deref for MaybeOwned<'_, T> { + type Target = T; + + fn deref(&self) -> &T { + match self { + MaybeOwned::Owned(it) => it, + MaybeOwned::Borrowed(it) => *it, + } + } +} + +impl std::ops::DerefMut for MaybeOwned<'_, T> { + fn deref_mut(&mut self) -> &mut T { + match self { + MaybeOwned::Owned(it) => it, + MaybeOwned::Borrowed(it) => *it, + } + } +} + +impl Default for MaybeOwned<'_, T> { + fn default() -> Self { + MaybeOwned::Owned(T::default()) + } +} + /// There might be zero, one or two leaves at a given offset. #[derive(Clone, Debug)] pub enum TokenAtOffset { diff --git a/tests/it/basic.rs b/tests/it/basic.rs index c33e9ee..176198e 100644 --- a/tests/it/basic.rs +++ b/tests/it/basic.rs @@ -3,7 +3,7 @@ use cstree::{GreenNodeBuilder, NodeCache, SyntaxKind, TextRange}; use lasso::{Resolver, Rodeo}; fn build_tree(root: &Element<'_>) -> (SyntaxNode, impl Resolver) { - let mut builder = GreenNodeBuilder::new(); + let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); build_recursive(root, &mut builder, 0); let (node, cache) = builder.finish(); (SyntaxNode::new_root(node), cache.unwrap().into_interner().unwrap()) @@ -178,5 +178,5 @@ fn assert_debug_display() { f::>(); fn dbg() {} - dbg::>(); + dbg::>(); } diff --git a/tests/it/main.rs b/tests/it/main.rs index 4c9511e..78de746 100644 --- a/tests/it/main.rs +++ b/tests/it/main.rs @@ -35,21 +35,30 @@ impl Language for TestLang { fn kind_to_raw(kind: Self::Kind) -> SyntaxKind { kind } + + fn static_text(_kind: Self::Kind) -> Option<&'static str> { + None + } } pub fn build_tree_with_cache<'c, 'i, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode where I: Interner, { - let mut builder = GreenNodeBuilder::with_cache(cache); + let mut builder: GreenNodeBuilder = GreenNodeBuilder::with_cache(cache); build_recursive(root, &mut builder, 0); let (node, cache) = builder.finish(); assert!(cache.is_none()); node } -pub fn build_recursive<'c, 'i, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'c, 'i, I>, mut from: u16) -> u16 +pub fn build_recursive<'c, 'i, L, I>( + root: &Element<'_>, + builder: &mut GreenNodeBuilder<'c, 'i, L, I>, + mut from: u16, +) -> u16 where + L: Language, I: Interner, { match root { diff --git a/tests/it/regressions.rs b/tests/it/regressions.rs index bf180de..3ad3cdd 100644 --- a/tests/it/regressions.rs +++ b/tests/it/regressions.rs @@ -24,9 +24,13 @@ fn empty_tree_arc() { fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind { cstree::SyntaxKind(kind as u16) } + + fn static_text(_kind: Self::Kind) -> Option<&'static str> { + None + } } - let mut builder = GreenNodeBuilder::new(); - builder.start_node(SyntaxKind(0)); + let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); + builder.start_node(SyntaxKind::Root); builder.finish_node(); let (green, _) = builder.finish(); let root: SyntaxNode = SyntaxNode::new_root(green); diff --git a/tests/it/sendsync.rs b/tests/it/sendsync.rs index 3989e56..a41a6b4 100644 --- a/tests/it/sendsync.rs +++ b/tests/it/sendsync.rs @@ -3,11 +3,11 @@ use crossbeam_utils::thread::scope; use std::{thread, time::Duration}; -use super::{build_recursive, Element, ResolvedNode, SyntaxNode}; +use super::{build_recursive, Element, ResolvedNode, SyntaxNode, TestLang}; use cstree::{interning::IntoResolver, GreenNodeBuilder}; fn build_tree(root: &Element<'_>) -> ResolvedNode { - let mut builder = GreenNodeBuilder::new(); + let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); build_recursive(root, &mut builder, 0); let (node, cache) = builder.finish(); SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap().into_resolver()) diff --git a/tests/it/serde.rs b/tests/it/serde.rs index 1ca9183..e7358f1 100644 --- a/tests/it/serde.rs +++ b/tests/it/serde.rs @@ -1,6 +1,6 @@ use crate::{build_recursive, build_tree_with_cache, ResolvedNode}; -use super::{Element, SyntaxNode}; +use super::{Element, SyntaxNode, TestLang}; use cstree::{ interning::{new_interner, IntoResolver}, GreenNodeBuilder, NodeCache, NodeOrToken, @@ -224,7 +224,7 @@ fn three_level_tree() -> Element<'static> { } fn build_tree(root: Element<'_>) -> ResolvedNode { - let mut builder = GreenNodeBuilder::new(); + let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); build_recursive(&root, &mut builder, 0); let (node, cache) = builder.finish(); SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap().into_resolver())