diff --git a/benches/main.rs b/benches/main.rs index 18e1560..95b7796 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -28,8 +28,8 @@ where { let mut builder = GreenNodeBuilder::with_cache(cache); build_recursive(root, &mut builder, 0); - let (node, interner) = builder.finish(); - assert!(interner.is_none()); + let (node, cache) = builder.finish(); + assert!(cache.is_none()); node } diff --git a/examples/math.rs b/examples/math.rs index 200359d..6ed2370 100644 --- a/examples/math.rs +++ b/examples/math.rs @@ -117,8 +117,11 @@ impl<'input, I: Iterator> Parser<'input, I> { self.parse_add(); self.builder.finish_node(); - let (tree, resolver) = self.builder.finish(); - (SyntaxNode::new_root(tree), resolver.unwrap().into_resolver()) + let (tree, cache) = self.builder.finish(); + ( + SyntaxNode::new_root(tree), + cache.unwrap().into_interner().unwrap().into_resolver(), + ) } } diff --git a/examples/s_expressions.rs b/examples/s_expressions.rs index 82ca82f..6883547 100644 --- a/examples/s_expressions.rs +++ b/examples/s_expressions.rs @@ -124,10 +124,10 @@ fn parse(text: &str) -> Parse { // Get the green tree from the builder. // Note that, since we didn't provide our own interner to the builder, it has // instantiated one for us and now returns it together with the tree. - let (tree, interner) = self.builder.finish(); + let (tree, cache) = self.builder.finish(); Parse { green_node: tree, - resolver: interner.unwrap().into_resolver(), + resolver: cache.unwrap().into_interner().unwrap().into_resolver(), errors: self.errors, } } diff --git a/src/green/builder.rs b/src/green/builder.rs index 167ea3e..33d212a 100644 --- a/src/green/builder.rs +++ b/src/green/builder.rs @@ -90,7 +90,9 @@ where /// assert_eq!(tree.kind(), ROOT); /// let int = tree.children().next().unwrap(); /// assert_eq!(int.kind(), INT); + /// assert_eq!(int.as_token().unwrap().text(&interner), "42"); /// ``` + #[inline] pub fn with_interner(interner: &'i mut I) -> Self { Self { nodes: FxHashMap::default(), @@ -99,6 +101,47 @@ where } } + /// Constructs a new, empty cache that will use the given interner to deduplicate source text + /// (strings) across tokens. + /// # Examples + /// ``` + /// # use cstree::*; + /// use lasso::Rodeo; + /// # const ROOT: SyntaxKind = SyntaxKind(0); + /// # const INT: SyntaxKind = SyntaxKind(1); + /// # fn parse(b: &mut GreenNodeBuilder, s: &str) {} + /// let mut interner = Rodeo::new(); + /// let cache = NodeCache::from_interner(interner); + /// let mut builder = GreenNodeBuilder::from_cache(cache); + /// # builder.start_node(ROOT); + /// # builder.token(INT, "42"); + /// # builder.finish_node(); + /// parse(&mut builder, "42"); + /// let (tree, cache) = builder.finish(); + /// let interner = cache.unwrap().into_interner().unwrap(); + /// assert_eq!(tree.kind(), ROOT); + /// let int = tree.children().next().unwrap(); + /// assert_eq!(int.kind(), INT); + /// assert_eq!(int.as_token().unwrap().text(&interner), "42"); + /// ``` + #[inline] + pub fn from_interner(interner: I) -> Self { + Self { + nodes: FxHashMap::default(), + tokens: FxHashMap::default(), + interner: MaybeOwned::Owned(interner), + } + } + + /// If this node cache was constructed with [`new`](NodeCache::new) or + /// [`from_interner`](NodeCache::from_interner), returns the interner used to deduplicate source + /// text (strings) to allow resolving tree tokens back to text and re-using the interner to build + /// additonal trees. + #[inline] + pub fn into_interner(self) -> Option { + self.interner.into_owned() + } + fn node(&mut self, kind: SyntaxKind, children: &[GreenElement]) -> GreenNode { let mut hasher = FxHasher32::default(); let mut text_len: TextSize = 0.into(); @@ -200,10 +243,11 @@ impl Default for MaybeOwned<'_, T> { pub struct Checkpoint(usize); /// A builder for green trees. -/// Construct with [`new`](GreenNodeBuilder::new) or [`with_cache`](GreenNodeBuilder::with_cache). To -/// add tree nodes, start them with [`start_node`](GreenNodeBuilder::start_node), add -/// [`token`](GreenNodeBuilder::token)s and then [`finish_node`](GreenNodeBuilder::finish_node). When -/// the whole tree is constructed, call [`finish`](GreenNodeBuilder::finish) to obtain the root. +/// Construct with [`new`](GreenNodeBuilder::new), [`with_cache`](GreenNodeBuilder::with_cache), or +/// [`from_cache`](GreenNodeBuilder::from_cache). To add tree nodes, start them with +/// [`start_node`](GreenNodeBuilder::start_node), add [`token`](GreenNodeBuilder::token)s and then +/// [`finish_node`](GreenNodeBuilder::finish_node). When the whole tree is constructed, call +/// [`finish`](GreenNodeBuilder::finish) to obtain the root. /// /// # Examples /// ``` @@ -214,11 +258,11 @@ pub struct Checkpoint(usize); /// builder.start_node(ROOT); /// builder.token(INT, "42"); /// builder.finish_node(); -/// let (tree, interner) = builder.finish(); +/// let (tree, cache) = builder.finish(); /// assert_eq!(tree.kind(), ROOT); /// let int = tree.children().next().unwrap(); /// assert_eq!(int.kind(), INT); -/// let resolver = interner.unwrap().into_resolver(); +/// let resolver = cache.unwrap().into_interner().unwrap().into_resolver(); /// assert_eq!(int.as_token().unwrap().text(&resolver), "42"); /// ``` #[derive(Debug)] @@ -259,6 +303,36 @@ where } } + /// Reusing a [`NodeCache`] between multiple builders saves memory, as it allows to structurally + /// share underlying trees. + /// The `cache` given will be returned on [`finish`](GreenNodeBuilder::finish). + /// # Examples + /// ``` + /// # use cstree::*; + /// # const ROOT: SyntaxKind = SyntaxKind(0); + /// # const INT: SyntaxKind = SyntaxKind(1); + /// # fn parse(b: &mut GreenNodeBuilder, s: &str) {} + /// let cache = NodeCache::new(); + /// let mut builder = GreenNodeBuilder::from_cache(cache); + /// # builder.start_node(ROOT); + /// # builder.token(INT, "42"); + /// # builder.finish_node(); + /// parse(&mut builder, "42"); + /// let (tree, cache) = builder.finish(); + /// let interner = cache.unwrap().into_interner().unwrap(); + /// assert_eq!(tree.kind(), ROOT); + /// let int = tree.children().next().unwrap(); + /// assert_eq!(int.kind(), INT); + /// assert_eq!(int.as_token().unwrap().text(&interner), "42"); + /// ``` + pub fn from_cache(cache: NodeCache<'interner, I>) -> Self { + Self { + cache: MaybeOwned::Owned(cache), + parents: Vec::with_capacity(8), + children: Vec::with_capacity(8), + } + } + /// Add new token to the current branch. #[inline] pub fn token(&mut self, kind: SyntaxKind, text: &str) { @@ -340,15 +414,16 @@ where /// [`finish_node`](GreenNodeBuilder::finish_node) are balanced, i.e. that every started node has /// been completed! /// - /// If this builder was constructed with [`new`](GreenNodeBuilder::new), this method returns the - /// interner used to deduplicate source text (strings) as its second return value to allow - /// resolving tree tokens back to text and re-using the interner to build additonal trees. + /// If this builder was constructed with [`new`](GreenNodeBuilder::new) or + /// [`from_cache`](GreenNodeBuilder::from_cache), this method returns the cache used to deduplicate tree nodes + /// (strings) as its second return value to allow re-using the cache or extracting the underlying string + /// [`Interner`]. See also [`NodeCache::into_interner`]. #[inline] - pub fn finish(mut self) -> (GreenNode, Option) { + pub fn finish(mut self) -> (GreenNode, Option>) { assert_eq!(self.children.len(), 1); - let resolver = self.cache.into_owned().and_then(|cache| cache.interner.into_owned()); + let cache = self.cache.into_owned(); match self.children.pop().unwrap() { - NodeOrToken::Node(node) => (node, resolver), + NodeOrToken::Node(node) => (node, cache), NodeOrToken::Token(_) => panic!("called `finish` on a `GreenNodeBuilder` which only contained a token"), } } diff --git a/src/serde_impls.rs b/src/serde_impls.rs index 93b5441..31413ce 100644 --- a/src/serde_impls.rs +++ b/src/serde_impls.rs @@ -191,8 +191,9 @@ where } } - let (tree, resolver) = builder.finish(); - let tree = ResolvedNode::new_root_with_resolver(tree, resolver.unwrap().into_resolver()); + let (tree, cache) = builder.finish(); + let tree = + ResolvedNode::new_root_with_resolver(tree, cache.unwrap().into_interner().unwrap().into_resolver()); Ok((tree, data_indices)) } } diff --git a/src/syntax/node.rs b/src/syntax/node.rs index c1346ca..55691d3 100644 --- a/src/syntax/node.rs +++ b/src/syntax/node.rs @@ -366,8 +366,8 @@ impl SyntaxNode { /// builder.start_node(ROOT); /// builder.token(TOKEN, "content"); /// builder.finish_node(); - /// let (green, resolver) = builder.finish(); - /// let root: ResolvedNode = SyntaxNode::new_root_with_resolver(green, resolver.unwrap()); + /// let (green, cache) = builder.finish(); + /// let root: ResolvedNode = SyntaxNode::new_root_with_resolver(green, cache.unwrap().into_interner().unwrap()); /// assert_eq!(root.text(), "content"); /// ``` #[inline] diff --git a/src/syntax/text.rs b/src/syntax/text.rs index 41f7ddf..a51437a 100644 --- a/src/syntax/text.rs +++ b/src/syntax/text.rs @@ -44,8 +44,8 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T /// # builder.start_node(LITERAL); /// # builder.token(LITERAL, s); /// # builder.finish_node(); -/// # let (root, interner) = builder.finish(); -/// # let resolver = interner.unwrap().into_resolver(); +/// # let (root, cache) = builder.finish(); +/// # let resolver = cache.unwrap().into_interner().unwrap().into_resolver(); /// # SyntaxNode::new_root_with_resolver(root, resolver) /// # } /// let node = parse_float_literal("2.748E2"); @@ -407,8 +407,8 @@ mod tests { builder.token(SyntaxKind(92), chunk); } builder.finish_node(); - let (node, interner) = builder.finish(); - (SyntaxNode::new_root(node), interner.unwrap()) + let (node, cache) = builder.finish(); + (SyntaxNode::new_root(node), cache.unwrap().into_interner().unwrap()) } #[test] diff --git a/tests/it/basic.rs b/tests/it/basic.rs index aae31c9..c8102fe 100644 --- a/tests/it/basic.rs +++ b/tests/it/basic.rs @@ -5,8 +5,8 @@ use lasso::{Resolver, Rodeo}; fn build_tree(root: &Element<'_>) -> (SyntaxNode, impl Resolver) { let mut builder = GreenNodeBuilder::new(); build_recursive(root, &mut builder, 0); - let (node, interner) = builder.finish(); - (SyntaxNode::new_root(node), interner.unwrap()) + let (node, cache) = builder.finish(); + (SyntaxNode::new_root(node), cache.unwrap().into_interner().unwrap()) } fn two_level_tree() -> Element<'static> { diff --git a/tests/it/main.rs b/tests/it/main.rs index a1780f7..ac5df88 100644 --- a/tests/it/main.rs +++ b/tests/it/main.rs @@ -43,8 +43,8 @@ where { let mut builder = GreenNodeBuilder::with_cache(cache); build_recursive(root, &mut builder, 0); - let (node, interner) = builder.finish(); - assert!(interner.is_none()); + let (node, cache) = builder.finish(); + assert!(cache.is_none()); node } diff --git a/tests/it/sendsync.rs b/tests/it/sendsync.rs index 53013e1..3989e56 100644 --- a/tests/it/sendsync.rs +++ b/tests/it/sendsync.rs @@ -9,8 +9,8 @@ use cstree::{interning::IntoResolver, GreenNodeBuilder}; fn build_tree(root: &Element<'_>) -> ResolvedNode { let mut builder = GreenNodeBuilder::new(); build_recursive(root, &mut builder, 0); - let (node, interner) = builder.finish(); - SyntaxNode::new_root_with_resolver(node, interner.unwrap().into_resolver()) + let (node, cache) = builder.finish(); + SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap().into_resolver()) } fn two_level_tree() -> Element<'static> { diff --git a/tests/it/serde.rs b/tests/it/serde.rs index 5ce135e..6ea5c71 100644 --- a/tests/it/serde.rs +++ b/tests/it/serde.rs @@ -225,8 +225,8 @@ fn three_level_tree() -> Element<'static> { fn build_tree(root: Element<'_>) -> ResolvedNode { let mut builder = GreenNodeBuilder::new(); build_recursive(&root, &mut builder, 0); - let (node, interner) = builder.finish(); - SyntaxNode::new_root_with_resolver(node, interner.unwrap().into_resolver()) + let (node, cache) = builder.finish(); + SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap().into_resolver()) } fn attach_data(node: &SyntaxNode) {