1
Fork 0
mirror of https://github.com/RGBCube/cstree synced 2025-07-27 09:07:44 +00:00

newtype default interner

This commit is contained in:
Domenic Quirl 2021-02-21 20:50:44 +01:00
parent d8ce241cf5
commit fb41635961
13 changed files with 164 additions and 29 deletions

4
Cargo.lock generated
View file

@ -95,9 +95,9 @@ checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
[[package]] [[package]]
name = "lasso" name = "lasso"
version = "0.4.1" version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17823787ed7c3f2ce99d4865d41edd4407b2fb6d9e71d534ec69d832a3ec2df3" checksum = "4efb7b456e95cc1ae2de7b18b1e4d791467b46f0a3d02464e5a16ea502091640"
dependencies = [ dependencies = [
"hashbrown", "hashbrown",
] ]

View file

@ -9,7 +9,7 @@ repository = "https://github.com/domenicquirl/cstree"
readme = "README.md" readme = "README.md"
[dependencies] [dependencies]
lasso = "0.4.1" lasso = "0.5"
text-size = "1.0.0" text-size = "1.0.0"
fxhash= "0.2.1" fxhash= "0.2.1"
servo_arc = { path = "vendor/servo_arc" } servo_arc = { path = "vendor/servo_arc" }

View file

@ -13,7 +13,10 @@
//! - "+" Token(Add) //! - "+" Token(Add)
//! - "4" Token(Number) //! - "4" Token(Number)
use cstree::{interning::Resolver, GreenNodeBuilder, NodeOrToken}; use cstree::{
interning::{IntoResolver, Resolver},
GreenNodeBuilder, NodeOrToken,
};
use std::iter::Peekable; use std::iter::Peekable;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]

View file

@ -59,7 +59,10 @@ impl cstree::Language for Lang {
/// offsets and parent pointers. /// offsets and parent pointers.
/// cstree also deduplicates the actual source string in addition to the tree nodes, so we will need /// cstree also deduplicates the actual source string in addition to the tree nodes, so we will need
/// the Resolver to get the real text back from the interned representation. /// the Resolver to get the real text back from the interned representation.
use cstree::{interning::Resolver, GreenNode}; use cstree::{
interning::{IntoResolver, Resolver},
GreenNode,
};
/// You can construct GreenNodes by hand, but a builder is helpful for top-down parsers: it maintains /// You can construct GreenNodes by hand, but a builder is helpful for top-down parsers: it maintains
/// a stack of currently in-progress nodes. /// a stack of currently in-progress nodes.

View file

@ -4,6 +4,7 @@
mod builder; mod builder;
mod element; mod element;
mod interner;
mod node; mod node;
mod token; mod token;
@ -12,6 +13,7 @@ use self::element::{GreenElement, PackedGreenElement};
pub use self::{ pub use self::{
builder::{Checkpoint, GreenNodeBuilder, NodeCache}, builder::{Checkpoint, GreenNodeBuilder, NodeCache},
interner::TokenInterner,
node::{Children, GreenNode}, node::{Children, GreenNode},
token::GreenToken, token::GreenToken,
}; };

View file

@ -1,11 +1,10 @@
use std::{convert::TryFrom, num::NonZeroUsize}; use std::convert::TryFrom;
use fxhash::{FxBuildHasher, FxHashMap}; use fxhash::FxHashMap;
use lasso::{Capacity, Rodeo, Spur};
use text_size::TextSize; use text_size::TextSize;
use crate::{ use crate::{
green::{GreenElement, GreenNode, GreenToken, SyntaxKind}, green::{interner::TokenInterner, GreenElement, GreenNode, GreenToken, SyntaxKind},
interning::Interner, interning::Interner,
NodeOrToken, NodeOrToken,
}; };
@ -21,13 +20,13 @@ const CHILDREN_CACHE_THRESHOLD: usize = 3;
/// A `NodeCache` deduplicates identical tokens and small nodes during tree construction. /// A `NodeCache` deduplicates identical tokens and small nodes during tree construction.
/// You can re-use the same cache for multiple similar trees with [`GreenNodeBuilder::with_cache`]. /// You can re-use the same cache for multiple similar trees with [`GreenNodeBuilder::with_cache`].
#[derive(Debug)] #[derive(Debug)]
pub struct NodeCache<'i, I = Rodeo<Spur, FxBuildHasher>> { pub struct NodeCache<'i, I = TokenInterner> {
nodes: FxHashMap<GreenNodeHead, GreenNode>, nodes: FxHashMap<GreenNodeHead, GreenNode>,
tokens: FxHashMap<GreenTokenData, GreenToken>, tokens: FxHashMap<GreenTokenData, GreenToken>,
interner: MaybeOwned<'i, I>, interner: MaybeOwned<'i, I>,
} }
impl NodeCache<'static, Rodeo<Spur, FxBuildHasher>> { impl NodeCache<'static> {
/// Constructs a new, empty cache. /// Constructs a new, empty cache.
/// ///
/// By default, this will also create a default interner to deduplicate source text (strings) across /// By default, this will also create a default interner to deduplicate source text (strings) across
@ -53,11 +52,7 @@ impl NodeCache<'static, Rodeo<Spur, FxBuildHasher>> {
Self { Self {
nodes: FxHashMap::default(), nodes: FxHashMap::default(),
tokens: FxHashMap::default(), tokens: FxHashMap::default(),
interner: MaybeOwned::Owned(Rodeo::with_capacity_and_hasher( interner: MaybeOwned::Owned(TokenInterner::new()),
// capacity values suggested by author of `lasso`
Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }),
FxBuildHasher::default(),
)),
} }
} }
} }
@ -77,7 +72,7 @@ where
/// # Examples /// # Examples
/// ``` /// ```
/// # use cstree::*; /// # use cstree::*;
/// # use lasso::Rodeo; /// use lasso::Rodeo;
/// # const ROOT: SyntaxKind = SyntaxKind(0); /// # const ROOT: SyntaxKind = SyntaxKind(0);
/// # const INT: SyntaxKind = SyntaxKind(1); /// # const INT: SyntaxKind = SyntaxKind(1);
/// # fn parse(b: &mut GreenNodeBuilder<Rodeo>, s: &str) {} /// # fn parse(b: &mut GreenNodeBuilder<Rodeo>, s: &str) {}
@ -239,7 +234,7 @@ pub struct Checkpoint(usize);
/// ///
/// # Examples /// # Examples
/// ``` /// ```
/// # use cstree::*; /// # use cstree::{*, interning::IntoResolver};
/// # const ROOT: SyntaxKind = SyntaxKind(0); /// # const ROOT: SyntaxKind = SyntaxKind(0);
/// # const INT: SyntaxKind = SyntaxKind(1); /// # const INT: SyntaxKind = SyntaxKind(1);
/// let mut builder = GreenNodeBuilder::new(); /// let mut builder = GreenNodeBuilder::new();
@ -254,13 +249,13 @@ pub struct Checkpoint(usize);
/// assert_eq!(int.as_token().unwrap().text(&resolver), "42"); /// assert_eq!(int.as_token().unwrap().text(&resolver), "42");
/// ``` /// ```
#[derive(Debug)] #[derive(Debug)]
pub struct GreenNodeBuilder<'cache, 'interner, I = Rodeo<Spur, FxBuildHasher>> { pub struct GreenNodeBuilder<'cache, 'interner, I = TokenInterner> {
cache: MaybeOwned<'cache, NodeCache<'interner, I>>, cache: MaybeOwned<'cache, NodeCache<'interner, I>>,
parents: Vec<(SyntaxKind, usize)>, parents: Vec<(SyntaxKind, usize)>,
children: Vec<GreenElement>, children: Vec<GreenElement>,
} }
impl GreenNodeBuilder<'static, 'static, Rodeo<Spur, FxBuildHasher>> { impl GreenNodeBuilder<'static, 'static> {
/// Creates new builder with an empty [`NodeCache`]. /// Creates new builder with an empty [`NodeCache`].
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {

123
src/green/interner.rs Normal file
View file

@ -0,0 +1,123 @@
use std::num::NonZeroUsize;
use fxhash::FxBuildHasher;
use lasso::{Capacity, Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver, Rodeo, Spur};
/// The default [`Interner`] used to deduplicate green token strings.
pub struct TokenInterner {
rodeo: Rodeo<Spur, FxBuildHasher>,
}
impl TokenInterner {
pub(super) fn new() -> Self {
Self {
rodeo: Rodeo::with_capacity_and_hasher(
// capacity values suggested by author of `lasso`
Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }),
FxBuildHasher::default(),
),
}
}
}
impl Resolver for TokenInterner {
#[inline]
fn resolve<'a>(&'a self, key: &Spur) -> &'a str {
self.rodeo.resolve(key)
}
#[inline]
fn try_resolve<'a>(&'a self, key: &Spur) -> Option<&'a str> {
self.rodeo.try_resolve(key)
}
#[inline]
unsafe fn resolve_unchecked<'a>(&'a self, key: &Spur) -> &'a str {
self.rodeo.resolve_unchecked(key)
}
#[inline]
fn contains_key(&self, key: &Spur) -> bool {
self.rodeo.contains_key(key)
}
#[inline]
fn len(&self) -> usize {
self.rodeo.len()
}
}
impl Reader for TokenInterner {
#[inline]
fn get(&self, val: &str) -> Option<Spur> {
self.rodeo.get(val)
}
#[inline]
fn contains(&self, val: &str) -> bool {
self.rodeo.contains(val)
}
}
impl IntoResolver for TokenInterner {
type Resolver = <Rodeo<Spur, FxBuildHasher> as IntoResolver>::Resolver;
#[inline]
fn into_resolver(self) -> Self::Resolver
where
Self: 'static,
{
self.rodeo.into_resolver()
}
#[inline]
fn into_resolver_boxed(self: Box<Self>) -> Self::Resolver
where
Self: 'static,
{
Rodeo::<Spur, FxBuildHasher>::into_resolver_boxed(Box::new(self.rodeo))
}
}
impl Interner for TokenInterner {
#[inline]
fn get_or_intern(&mut self, val: &str) -> Spur {
self.rodeo.get_or_intern(val)
}
#[inline]
fn try_get_or_intern(&mut self, val: &str) -> lasso::LassoResult<Spur> {
self.rodeo.try_get_or_intern(val)
}
#[inline]
fn get_or_intern_static(&mut self, val: &'static str) -> Spur {
self.rodeo.get_or_intern_static(val)
}
#[inline]
fn try_get_or_intern_static(&mut self, val: &'static str) -> lasso::LassoResult<Spur> {
self.rodeo.try_get_or_intern_static(val)
}
}
impl IntoReader for TokenInterner {
type Reader = <Rodeo<Spur, FxBuildHasher> as IntoReader>::Reader;
#[inline]
fn into_reader(self) -> Self::Reader
where
Self: 'static,
{
self.rodeo.into_reader()
}
fn into_reader_boxed(self: Box<Self>) -> Self::Reader
where
Self: 'static,
{
Rodeo::<Spur, FxBuildHasher>::into_reader_boxed(Box::new(self.rodeo))
}
}
impl IntoReaderAndResolver for TokenInterner {}

View file

@ -63,7 +63,8 @@ mod utility_types;
/// Types and Traits for efficient String storage and deduplication. /// Types and Traits for efficient String storage and deduplication.
pub mod interning { pub mod interning {
pub use lasso::{Interner, Reader, Resolver}; pub use crate::green::TokenInterner;
pub use lasso::{Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver};
} }
use std::fmt; use std::fmt;

View file

@ -1,6 +1,9 @@
//! Serialization and Deserialization for syntax trees. //! Serialization and Deserialization for syntax trees.
use crate::{interning::Resolver, GreenNodeBuilder, Language, NodeOrToken, SyntaxKind, SyntaxNode, WalkEvent}; use crate::{
interning::{IntoResolver, Resolver},
GreenNodeBuilder, Language, NodeOrToken, SyntaxKind, SyntaxNode, WalkEvent,
};
use serde::{ use serde::{
de::{Error, SeqAccess, Visitor}, de::{Error, SeqAccess, Visitor},
ser::SerializeTuple, ser::SerializeTuple,

View file

@ -443,7 +443,7 @@ impl<L: Language, D, R> SyntaxNode<L, D, R> {
/// ///
/// # Example /// # Example
/// ``` /// ```
/// # use cstree::*; /// # use cstree::{*, interning::TokenInterner};
/// # #[allow(non_camel_case_types)] /// # #[allow(non_camel_case_types)]
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] /// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// #[repr(u16)] /// #[repr(u16)]
@ -468,7 +468,7 @@ impl<L: Language, D, R> SyntaxNode<L, D, R> {
/// } /// }
/// # const ROOT: cstree::SyntaxKind = cstree::SyntaxKind(0); /// # const ROOT: cstree::SyntaxKind = cstree::SyntaxKind(0);
/// # const TOKEN: cstree::SyntaxKind = cstree::SyntaxKind(1); /// # const TOKEN: cstree::SyntaxKind = cstree::SyntaxKind(1);
/// # type SyntaxNode<L> = cstree::SyntaxNode<L, (), lasso::Rodeo<lasso::Spur, fxhash::FxBuildHasher>>; /// # type SyntaxNode<L> = cstree::SyntaxNode<L, (), TokenInterner>;
/// let mut builder = GreenNodeBuilder::new(); /// let mut builder = GreenNodeBuilder::new();
/// builder.start_node(ROOT); /// builder.start_node(ROOT);
/// builder.token(TOKEN, "content"); /// builder.token(TOKEN, "content");

View file

@ -13,7 +13,7 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T
/// ///
/// # Example /// # Example
/// ``` /// ```
/// # use cstree::*; /// # use cstree::{*, interning::IntoResolver};
/// # #[allow(non_camel_case_types)] /// # #[allow(non_camel_case_types)]
/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] /// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// # #[repr(u16)] /// # #[repr(u16)]

View file

@ -7,8 +7,10 @@ use crossbeam_utils::thread::scope;
use std::{thread, time::Duration}; use std::{thread, time::Duration};
use common::{build_recursive, Element, SyntaxNode}; use common::{build_recursive, Element, SyntaxNode};
use cstree::GreenNodeBuilder; use cstree::{
use lasso::Resolver; interning::{IntoResolver, Resolver},
GreenNodeBuilder,
};
fn build_tree<D>(root: &Element<'_>) -> SyntaxNode<D, impl Resolver> { fn build_tree<D>(root: &Element<'_>) -> SyntaxNode<D, impl Resolver> {
let mut builder = GreenNodeBuilder::new(); let mut builder = GreenNodeBuilder::new();

View file

@ -1,10 +1,13 @@
#![cfg(feature = "serde1")] #![cfg(feature = "serde1")]
#[allow(unused)]
mod common; mod common;
use common::{Element, SyntaxNode}; use common::{Element, SyntaxNode};
use cstree::{GreenNodeBuilder, NodeCache, NodeOrToken}; use cstree::{
use lasso::Resolver; interning::{IntoResolver, Resolver},
GreenNodeBuilder, NodeCache, NodeOrToken,
};
use serde_test::Token; use serde_test::Token;
use std::fmt; use std::fmt;