diff --git a/Cargo.lock b/Cargo.lock index 7e3ca67..7c4c2d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -95,9 +95,9 @@ checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" [[package]] name = "lasso" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17823787ed7c3f2ce99d4865d41edd4407b2fb6d9e71d534ec69d832a3ec2df3" +checksum = "4efb7b456e95cc1ae2de7b18b1e4d791467b46f0a3d02464e5a16ea502091640" dependencies = [ "hashbrown", ] diff --git a/Cargo.toml b/Cargo.toml index 07dfbfc..6920393 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ repository = "https://github.com/domenicquirl/cstree" readme = "README.md" [dependencies] -lasso = "0.4.1" +lasso = "0.5" text-size = "1.0.0" fxhash= "0.2.1" servo_arc = { path = "vendor/servo_arc" } diff --git a/examples/math.rs b/examples/math.rs index 47414a2..03b114b 100644 --- a/examples/math.rs +++ b/examples/math.rs @@ -13,7 +13,10 @@ //! - "+" Token(Add) //! - "4" Token(Number) -use cstree::{interning::Resolver, GreenNodeBuilder, NodeOrToken}; +use cstree::{ + interning::{IntoResolver, Resolver}, + GreenNodeBuilder, NodeOrToken, +}; use std::iter::Peekable; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] diff --git a/examples/s_expressions.rs b/examples/s_expressions.rs index f7475fd..e0b7642 100644 --- a/examples/s_expressions.rs +++ b/examples/s_expressions.rs @@ -59,7 +59,10 @@ impl cstree::Language for Lang { /// offsets and parent pointers. /// cstree also deduplicates the actual source string in addition to the tree nodes, so we will need /// the Resolver to get the real text back from the interned representation. -use cstree::{interning::Resolver, GreenNode}; +use cstree::{ + interning::{IntoResolver, Resolver}, + GreenNode, +}; /// You can construct GreenNodes by hand, but a builder is helpful for top-down parsers: it maintains /// a stack of currently in-progress nodes. diff --git a/src/green.rs b/src/green.rs index ee352ac..6379106 100644 --- a/src/green.rs +++ b/src/green.rs @@ -4,6 +4,7 @@ mod builder; mod element; +mod interner; mod node; mod token; @@ -12,6 +13,7 @@ use self::element::{GreenElement, PackedGreenElement}; pub use self::{ builder::{Checkpoint, GreenNodeBuilder, NodeCache}, + interner::TokenInterner, node::{Children, GreenNode}, token::GreenToken, }; diff --git a/src/green/builder.rs b/src/green/builder.rs index d72a5f3..973c3d4 100644 --- a/src/green/builder.rs +++ b/src/green/builder.rs @@ -1,11 +1,10 @@ -use std::{convert::TryFrom, num::NonZeroUsize}; +use std::convert::TryFrom; -use fxhash::{FxBuildHasher, FxHashMap}; -use lasso::{Capacity, Rodeo, Spur}; +use fxhash::FxHashMap; use text_size::TextSize; use crate::{ - green::{GreenElement, GreenNode, GreenToken, SyntaxKind}, + green::{interner::TokenInterner, GreenElement, GreenNode, GreenToken, SyntaxKind}, interning::Interner, NodeOrToken, }; @@ -21,13 +20,13 @@ const CHILDREN_CACHE_THRESHOLD: usize = 3; /// A `NodeCache` deduplicates identical tokens and small nodes during tree construction. /// You can re-use the same cache for multiple similar trees with [`GreenNodeBuilder::with_cache`]. #[derive(Debug)] -pub struct NodeCache<'i, I = Rodeo> { +pub struct NodeCache<'i, I = TokenInterner> { nodes: FxHashMap, tokens: FxHashMap, interner: MaybeOwned<'i, I>, } -impl NodeCache<'static, Rodeo> { +impl NodeCache<'static> { /// Constructs a new, empty cache. /// /// By default, this will also create a default interner to deduplicate source text (strings) across @@ -53,11 +52,7 @@ impl NodeCache<'static, Rodeo> { Self { nodes: FxHashMap::default(), tokens: FxHashMap::default(), - interner: MaybeOwned::Owned(Rodeo::with_capacity_and_hasher( - // capacity values suggested by author of `lasso` - Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }), - FxBuildHasher::default(), - )), + interner: MaybeOwned::Owned(TokenInterner::new()), } } } @@ -77,7 +72,7 @@ where /// # Examples /// ``` /// # use cstree::*; - /// # use lasso::Rodeo; + /// use lasso::Rodeo; /// # const ROOT: SyntaxKind = SyntaxKind(0); /// # const INT: SyntaxKind = SyntaxKind(1); /// # fn parse(b: &mut GreenNodeBuilder, s: &str) {} @@ -239,7 +234,7 @@ pub struct Checkpoint(usize); /// /// # Examples /// ``` -/// # use cstree::*; +/// # use cstree::{*, interning::IntoResolver}; /// # const ROOT: SyntaxKind = SyntaxKind(0); /// # const INT: SyntaxKind = SyntaxKind(1); /// let mut builder = GreenNodeBuilder::new(); @@ -254,13 +249,13 @@ pub struct Checkpoint(usize); /// assert_eq!(int.as_token().unwrap().text(&resolver), "42"); /// ``` #[derive(Debug)] -pub struct GreenNodeBuilder<'cache, 'interner, I = Rodeo> { +pub struct GreenNodeBuilder<'cache, 'interner, I = TokenInterner> { cache: MaybeOwned<'cache, NodeCache<'interner, I>>, parents: Vec<(SyntaxKind, usize)>, children: Vec, } -impl GreenNodeBuilder<'static, 'static, Rodeo> { +impl GreenNodeBuilder<'static, 'static> { /// Creates new builder with an empty [`NodeCache`]. pub fn new() -> Self { Self { diff --git a/src/green/interner.rs b/src/green/interner.rs new file mode 100644 index 0000000..90d38cb --- /dev/null +++ b/src/green/interner.rs @@ -0,0 +1,123 @@ +use std::num::NonZeroUsize; + +use fxhash::FxBuildHasher; +use lasso::{Capacity, Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver, Rodeo, Spur}; + +/// The default [`Interner`] used to deduplicate green token strings. +pub struct TokenInterner { + rodeo: Rodeo, +} + +impl TokenInterner { + pub(super) fn new() -> Self { + Self { + rodeo: Rodeo::with_capacity_and_hasher( + // capacity values suggested by author of `lasso` + Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }), + FxBuildHasher::default(), + ), + } + } +} + +impl Resolver for TokenInterner { + #[inline] + fn resolve<'a>(&'a self, key: &Spur) -> &'a str { + self.rodeo.resolve(key) + } + + #[inline] + fn try_resolve<'a>(&'a self, key: &Spur) -> Option<&'a str> { + self.rodeo.try_resolve(key) + } + + #[inline] + unsafe fn resolve_unchecked<'a>(&'a self, key: &Spur) -> &'a str { + self.rodeo.resolve_unchecked(key) + } + + #[inline] + fn contains_key(&self, key: &Spur) -> bool { + self.rodeo.contains_key(key) + } + + #[inline] + fn len(&self) -> usize { + self.rodeo.len() + } +} + +impl Reader for TokenInterner { + #[inline] + fn get(&self, val: &str) -> Option { + self.rodeo.get(val) + } + + #[inline] + fn contains(&self, val: &str) -> bool { + self.rodeo.contains(val) + } +} + +impl IntoResolver for TokenInterner { + type Resolver = as IntoResolver>::Resolver; + + #[inline] + fn into_resolver(self) -> Self::Resolver + where + Self: 'static, + { + self.rodeo.into_resolver() + } + + #[inline] + fn into_resolver_boxed(self: Box) -> Self::Resolver + where + Self: 'static, + { + Rodeo::::into_resolver_boxed(Box::new(self.rodeo)) + } +} + +impl Interner for TokenInterner { + #[inline] + fn get_or_intern(&mut self, val: &str) -> Spur { + self.rodeo.get_or_intern(val) + } + + #[inline] + fn try_get_or_intern(&mut self, val: &str) -> lasso::LassoResult { + self.rodeo.try_get_or_intern(val) + } + + #[inline] + fn get_or_intern_static(&mut self, val: &'static str) -> Spur { + self.rodeo.get_or_intern_static(val) + } + + #[inline] + fn try_get_or_intern_static(&mut self, val: &'static str) -> lasso::LassoResult { + self.rodeo.try_get_or_intern_static(val) + } +} + +impl IntoReader for TokenInterner { + type Reader = as IntoReader>::Reader; + + #[inline] + fn into_reader(self) -> Self::Reader + where + Self: 'static, + { + self.rodeo.into_reader() + } + + fn into_reader_boxed(self: Box) -> Self::Reader + where + Self: 'static, + { + Rodeo::::into_reader_boxed(Box::new(self.rodeo)) + } +} + +impl IntoReaderAndResolver for TokenInterner {} diff --git a/src/lib.rs b/src/lib.rs index 91c24dd..914ae8b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,7 +63,8 @@ mod utility_types; /// Types and Traits for efficient String storage and deduplication. pub mod interning { - pub use lasso::{Interner, Reader, Resolver}; + pub use crate::green::TokenInterner; + pub use lasso::{Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver}; } use std::fmt; diff --git a/src/serde_impls.rs b/src/serde_impls.rs index 654a691..a30373f 100644 --- a/src/serde_impls.rs +++ b/src/serde_impls.rs @@ -1,6 +1,9 @@ //! Serialization and Deserialization for syntax trees. -use crate::{interning::Resolver, GreenNodeBuilder, Language, NodeOrToken, SyntaxKind, SyntaxNode, WalkEvent}; +use crate::{ + interning::{IntoResolver, Resolver}, + GreenNodeBuilder, Language, NodeOrToken, SyntaxKind, SyntaxNode, WalkEvent, +}; use serde::{ de::{Error, SeqAccess, Visitor}, ser::SerializeTuple, diff --git a/src/syntax.rs b/src/syntax.rs index c6baaab..c7eb326 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -443,7 +443,7 @@ impl SyntaxNode { /// /// # Example /// ``` - /// # use cstree::*; + /// # use cstree::{*, interning::TokenInterner}; /// # #[allow(non_camel_case_types)] /// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] /// #[repr(u16)] @@ -468,7 +468,7 @@ impl SyntaxNode { /// } /// # const ROOT: cstree::SyntaxKind = cstree::SyntaxKind(0); /// # const TOKEN: cstree::SyntaxKind = cstree::SyntaxKind(1); - /// # type SyntaxNode = cstree::SyntaxNode>; + /// # type SyntaxNode = cstree::SyntaxNode; /// let mut builder = GreenNodeBuilder::new(); /// builder.start_node(ROOT); /// builder.token(TOKEN, "content"); diff --git a/src/syntax_text.rs b/src/syntax_text.rs index 62e9500..79bb2d7 100644 --- a/src/syntax_text.rs +++ b/src/syntax_text.rs @@ -13,7 +13,7 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T /// /// # Example /// ``` -/// # use cstree::*; +/// # use cstree::{*, interning::IntoResolver}; /// # #[allow(non_camel_case_types)] /// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] /// # #[repr(u16)] diff --git a/tests/sendsync.rs b/tests/sendsync.rs index 6886c3c..b347ca9 100644 --- a/tests/sendsync.rs +++ b/tests/sendsync.rs @@ -7,8 +7,10 @@ use crossbeam_utils::thread::scope; use std::{thread, time::Duration}; use common::{build_recursive, Element, SyntaxNode}; -use cstree::GreenNodeBuilder; -use lasso::Resolver; +use cstree::{ + interning::{IntoResolver, Resolver}, + GreenNodeBuilder, +}; fn build_tree(root: &Element<'_>) -> SyntaxNode { let mut builder = GreenNodeBuilder::new(); diff --git a/tests/serde.rs b/tests/serde.rs index 8b1bad5..5537e84 100644 --- a/tests/serde.rs +++ b/tests/serde.rs @@ -1,10 +1,13 @@ #![cfg(feature = "serde1")] +#[allow(unused)] mod common; use common::{Element, SyntaxNode}; -use cstree::{GreenNodeBuilder, NodeCache, NodeOrToken}; -use lasso::Resolver; +use cstree::{ + interning::{IntoResolver, Resolver}, + GreenNodeBuilder, NodeCache, NodeOrToken, +}; use serde_test::Token; use std::fmt;