1
Fork 0
mirror of https://github.com/RGBCube/cstree synced 2025-07-27 09:07:44 +00:00

refactor re-exports of lasso interning components and expose interned string keys

This commit is contained in:
Domenic Quirl 2021-09-17 16:37:15 +02:00
parent e7ab5ad987
commit 2aaf4169da
8 changed files with 184 additions and 37 deletions

View file

@ -15,7 +15,7 @@ readme = "README.md"
debug = true debug = true
[dependencies] [dependencies]
lasso = { version = "0.6", features = ["inline-more"] } lasso = { version = "0.6", features = ["inline-more", "multi-threaded"] }
text-size = "1.1.0" text-size = "1.1.0"
fxhash = "0.2.1" fxhash = "0.2.1"
parking_lot = "0.11.2" parking_lot = "0.11.2"
@ -41,8 +41,8 @@ name = "main"
harness = false harness = false
[features] [features]
default = [] default = []
serde1 = ["serde"] serialize = ["serde", "lasso/serialize"]
[package.metadata.docs.rs] [package.metadata.docs.rs]
features = ["serde1"] features = ["serialize"]

View file

@ -356,6 +356,26 @@ where
} }
} }
/// Shortcut to construct a builder that uses an existing interner.
///
/// This is equivalent to using [`from_cache`](GreenNodeBuilder::from_cache) with a node cache
/// obtained from [`NodeCache::with_interner`].
#[inline]
pub fn with_interner(interner: &'interner mut I) -> Self {
let cache = NodeCache::with_interner(interner);
Self::from_cache(cache)
}
/// Shortcut to construct a builder that uses an existing interner.
///
/// This is equivalent to using [`from_cache`](GreenNodeBuilder::from_cache) with a node cache
/// obtained from [`NodeCache::from_interner`].
#[inline]
pub fn from_interner(interner: I) -> Self {
let cache = NodeCache::from_interner(interner);
Self::from_cache(cache)
}
/// Get a reference to the interner used to deduplicate source text (strings). /// Get a reference to the interner used to deduplicate source text (strings).
/// ///
/// This is the same interner as used by the underlying [`NodeCache`]. /// This is the same interner as used by the underlying [`NodeCache`].

View file

@ -1,12 +1,14 @@
use std::num::NonZeroUsize; use std::num::NonZeroUsize;
use crate::interning::{
Capacity, Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Key, Reader, Resolver, Rodeo,
};
use fxhash::FxBuildHasher; use fxhash::FxBuildHasher;
use lasso::{Capacity, Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver, Rodeo, Spur};
/// The default [`Interner`] used to deduplicate green token strings. /// The default [`Interner`] used to deduplicate green token strings.
#[derive(Debug)] #[derive(Debug)]
pub struct TokenInterner { pub struct TokenInterner {
rodeo: Rodeo<Spur, FxBuildHasher>, rodeo: Rodeo,
} }
impl TokenInterner { impl TokenInterner {
@ -23,22 +25,22 @@ impl TokenInterner {
impl Resolver for TokenInterner { impl Resolver for TokenInterner {
#[inline] #[inline]
fn resolve<'a>(&'a self, key: &Spur) -> &'a str { fn resolve<'a>(&'a self, key: &Key) -> &'a str {
self.rodeo.resolve(key) self.rodeo.resolve(key)
} }
#[inline] #[inline]
fn try_resolve<'a>(&'a self, key: &Spur) -> Option<&'a str> { fn try_resolve<'a>(&'a self, key: &Key) -> Option<&'a str> {
self.rodeo.try_resolve(key) self.rodeo.try_resolve(key)
} }
#[inline] #[inline]
unsafe fn resolve_unchecked<'a>(&'a self, key: &Spur) -> &'a str { unsafe fn resolve_unchecked<'a>(&'a self, key: &Key) -> &'a str {
self.rodeo.resolve_unchecked(key) self.rodeo.resolve_unchecked(key)
} }
#[inline] #[inline]
fn contains_key(&self, key: &Spur) -> bool { fn contains_key(&self, key: &Key) -> bool {
self.rodeo.contains_key(key) self.rodeo.contains_key(key)
} }
@ -50,7 +52,7 @@ impl Resolver for TokenInterner {
impl Reader for TokenInterner { impl Reader for TokenInterner {
#[inline] #[inline]
fn get(&self, val: &str) -> Option<Spur> { fn get(&self, val: &str) -> Option<Key> {
self.rodeo.get(val) self.rodeo.get(val)
} }
@ -61,7 +63,7 @@ impl Reader for TokenInterner {
} }
impl IntoResolver for TokenInterner { impl IntoResolver for TokenInterner {
type Resolver = <Rodeo<Spur, FxBuildHasher> as IntoResolver>::Resolver; type Resolver = <Rodeo as IntoResolver>::Resolver;
#[inline] #[inline]
fn into_resolver(self) -> Self::Resolver fn into_resolver(self) -> Self::Resolver
@ -76,34 +78,34 @@ impl IntoResolver for TokenInterner {
where where
Self: 'static, Self: 'static,
{ {
Rodeo::<Spur, FxBuildHasher>::into_resolver_boxed(Box::new(self.rodeo)) Rodeo::into_resolver_boxed(Box::new(self.rodeo))
} }
} }
impl Interner for TokenInterner { impl Interner for TokenInterner {
#[inline] #[inline]
fn get_or_intern(&mut self, val: &str) -> Spur { fn get_or_intern(&mut self, val: &str) -> Key {
self.rodeo.get_or_intern(val) self.rodeo.get_or_intern(val)
} }
#[inline] #[inline]
fn try_get_or_intern(&mut self, val: &str) -> lasso::LassoResult<Spur> { fn try_get_or_intern(&mut self, val: &str) -> lasso::LassoResult<Key> {
self.rodeo.try_get_or_intern(val) self.rodeo.try_get_or_intern(val)
} }
#[inline] #[inline]
fn get_or_intern_static(&mut self, val: &'static str) -> Spur { fn get_or_intern_static(&mut self, val: &'static str) -> Key {
self.rodeo.get_or_intern_static(val) self.rodeo.get_or_intern_static(val)
} }
#[inline] #[inline]
fn try_get_or_intern_static(&mut self, val: &'static str) -> lasso::LassoResult<Spur> { fn try_get_or_intern_static(&mut self, val: &'static str) -> lasso::LassoResult<Key> {
self.rodeo.try_get_or_intern_static(val) self.rodeo.try_get_or_intern_static(val)
} }
} }
impl IntoReader for TokenInterner { impl IntoReader for TokenInterner {
type Reader = <Rodeo<Spur, FxBuildHasher> as IntoReader>::Reader; type Reader = <Rodeo as IntoReader>::Reader;
#[inline] #[inline]
fn into_reader(self) -> Self::Reader fn into_reader(self) -> Self::Reader
@ -117,7 +119,7 @@ impl IntoReader for TokenInterner {
where where
Self: 'static, Self: 'static,
{ {
Rodeo::<Spur, FxBuildHasher>::into_reader_boxed(Box::new(self.rodeo)) Rodeo::into_reader_boxed(Box::new(self.rodeo))
} }
} }

View file

@ -1,14 +1,17 @@
use std::{fmt, hash, mem::ManuallyDrop, ptr}; use std::{fmt, hash, mem::ManuallyDrop, ptr};
use crate::{green::SyntaxKind, interning::Resolver, TextSize}; use crate::{
use lasso::Spur; green::SyntaxKind,
interning::{Key, Resolver},
TextSize,
};
use triomphe::Arc; use triomphe::Arc;
#[repr(align(2))] // to use 1 bit for pointer tagging. NB: this is an at-least annotation #[repr(align(2))] // to use 1 bit for pointer tagging. NB: this is an at-least annotation
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] #[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
pub(super) struct GreenTokenData { pub(super) struct GreenTokenData {
pub(super) kind: SyntaxKind, pub(super) kind: SyntaxKind,
pub(super) text: Spur, pub(super) text: Key,
pub(super) text_len: TextSize, pub(super) text_len: TextSize,
} }
@ -70,8 +73,12 @@ impl GreenToken {
self.data().text_len self.data().text_len
} }
/// Returns the interned key of text covered by this token.
/// This key may be used for comparisons with other keys of strings interned by the same interner.
///
/// See also [`text`](GreenToken::text).
#[inline] #[inline]
pub(crate) fn text_key(&self) -> Spur { pub fn text_key(&self) -> Key {
self.data().text self.data().text
} }
} }

47
src/interning.rs Normal file
View file

@ -0,0 +1,47 @@
//! Types and Traits for efficient String storage and deduplication.
//!
//! Interning functionality is provided by the [`lasso`](lasso) crate.
pub use fxhash::FxBuildHasher as Hasher;
pub use crate::green::TokenInterner;
/// The index type for all interners. Each key represents
pub type Key = lasso::Spur;
pub use lasso::{Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver};
/// A string interner that caches strings quickly with a minimal memory footprint, returning a unique key to re-access
/// it with `O(1)` times. By default, `Rodeo` uses an [`fxhash`] [`Hasher`].
pub type Rodeo<S = Hasher> = lasso::Rodeo<Key, S>;
/// Constructs a new, single-threaded interner.
///
/// If you need the interner to be multi-threaded, see [`new_threaded_interner`].
#[inline]
pub fn new_interner() -> Rodeo {
Rodeo::with_hasher(Hasher::default())
}
/// A string interner that caches strings quickly with a minimal memory footprint, returning a unique key to re-access
/// it with `O(1)` times. By default, `ThreadedRodeo` uses an [`fxhash`] [`Hasher`].
pub type ThreadedRodeo<S = Hasher> = lasso::ThreadedRodeo<Key, S>;
/// Constructs a new interner that can be used across multiple threads.
#[inline]
pub fn new_threaded_interner() -> ThreadedRodeo {
ThreadedRodeo::with_hasher(Hasher::default())
}
/// A read-only view of a [`Rodeo`] or [`ThreadedRodeo`] that allows contention-free access to interned strings, both
/// key to string resolution and string to key lookups.
///
/// The hasher is the same as the Rodeo or ThreadedRodeo that created it.
/// Can be acquired with the `into_reader` methods (see also [`IntoReader`]).
pub type RodeoReader<S = Hasher> = lasso::RodeoReader<Key, S>;
/// A read-only view of a [`Rodeo`] or [`ThreadedRodeo`] that allows contention-free access to interned strings with
/// only key to string resolution.
///
/// Can be acquired with the `into_resolver` methods (see also [`IntoResolver`]).
pub type RodeoResolver = lasso::RodeoResolver<Key>;
pub use lasso::{Capacity, Iter, LassoError, LassoErrorKind, LassoResult, MemoryLimits, Strings};

View file

@ -49,26 +49,23 @@
#[allow(unsafe_code)] #[allow(unsafe_code)]
mod green; mod green;
#[allow(unsafe_code)] #[allow(unsafe_code)]
pub mod syntax; mod syntax;
#[cfg(feature = "serde1")] #[cfg(feature = "serde1")]
mod serde_impls; mod serde_impls;
#[allow(missing_docs)] #[allow(missing_docs)]
mod utility_types; mod utility_types;
/// Types and Traits for efficient String storage and deduplication. pub mod interning;
pub mod interning {
pub use crate::green::TokenInterner;
pub use lasso::{Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver};
}
use std::fmt; use std::fmt;
// Reexport types for working with strings. // Reexport types for working with strings.
pub use text_size::{TextLen, TextRange, TextSize}; pub use text_size::{TextLen, TextRange, TextSize};
#[doc(inline)]
pub use crate::syntax::*;
pub use crate::{ pub use crate::{
green::{Checkpoint, Children, GreenNode, GreenNodeBuilder, GreenToken, NodeCache, SyntaxKind}, green::{Checkpoint, Children, GreenNode, GreenNodeBuilder, GreenToken, NodeCache, SyntaxKind},
syntax::*,
utility_types::{Direction, NodeOrToken, TokenAtOffset, WalkEvent}, utility_types::{Direction, NodeOrToken, TokenAtOffset, WalkEvent},
}; };
pub use triomphe::Arc; pub use triomphe::Arc;

View file

@ -9,7 +9,7 @@ use lasso::Resolver;
use text_size::{TextRange, TextSize}; use text_size::{TextRange, TextSize};
use super::*; use super::*;
use crate::{Direction, GreenNode, GreenToken, Language, SyntaxKind}; use crate::{interning::Key, Direction, GreenNode, GreenToken, Language, SyntaxKind};
/// Syntax tree token. /// Syntax tree token.
#[derive(Debug)] #[derive(Debug)]
@ -69,6 +69,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
/// Returns this token's [`Debug`](fmt::Debug) representation as a string. /// Returns this token's [`Debug`](fmt::Debug) representation as a string.
/// ///
/// To avoid allocating for every token, see [`write_debug`](SyntaxToken::write_debug). /// To avoid allocating for every token, see [`write_debug`](SyntaxToken::write_debug).
#[inline]
pub fn debug<R>(&self, resolver: &R) -> String pub fn debug<R>(&self, resolver: &R) -> String
where where
R: Resolver + ?Sized, R: Resolver + ?Sized,
@ -182,16 +183,86 @@ impl<L: Language, D> SyntaxToken<L, D> {
/// This method is different from the `PartialEq` and `Eq` implementations in that it compares /// This method is different from the `PartialEq` and `Eq` implementations in that it compares
/// the text and not the token position. /// the text and not the token position.
/// It is more efficient than comparing the result of /// It is more efficient than comparing the result of
/// [`resolve_text`](SyntaxToken::resolve_text) because it compares the tokens' interned string /// [`resolve_text`](SyntaxToken::resolve_text) because it compares the tokens' interned
/// keys. /// [`text_key`s](SyntaxToken::text_key).
/// Therefore, it also does not require a [`Resolver`]. /// Therefore, it also does not require a [`Resolver`].
/// **Note** that the result of the comparison may be wrong when comparing two tokens from /// **Note** that the result of the comparison may be wrong when comparing two tokens from
/// different trees that use different interners. /// different trees that use different interners.
#[inline]
pub fn text_eq(&self, other: &Self) -> bool { pub fn text_eq(&self, other: &Self) -> bool {
self.green().text_key() == other.green().text_key() self.text_key() == other.text_key()
}
/// Returns the interned key of text covered by this token.
/// This key may be used for comparisons with other keys of strings interned by the same interner.
///
/// See also [`resolve_text`](SyntaxToken::resolve_text) and [`text_eq`](SyntaxToken::text_eq).
///
/// # Examples
/// If you intern strings inside of your application, e.g. inside of a compiler, you can use
/// token's text keys to cross-reference between the syntax tree and the rest of your
/// implementation by re-using the interner in both.
/// ```
/// # use cstree::*;
/// # use cstree::interning::{Hasher, Rodeo, Key, new_interner};
/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// # #[repr(u16)]
/// # enum SyntaxKind {
/// # ROOT,
/// # INT,
/// # }
/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// # enum Lang {}
/// # impl cstree::Language for Lang {
/// # type Kind = SyntaxKind;
/// #
/// # fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
/// # assert!(raw.0 <= SyntaxKind::INT as u16);
/// # unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
/// # }
/// #
/// # fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
/// # cstree::SyntaxKind(kind as u16)
/// # }
/// # }
/// # type SyntaxNode<L> = cstree::SyntaxNode<L, ()>;
/// # const ROOT: cstree::SyntaxKind = cstree::SyntaxKind(0);
/// # const IDENT: cstree::SyntaxKind = cstree::SyntaxKind(1);
/// # fn parse(b: &mut GreenNodeBuilder<Rodeo>, s: &str) {}
/// #
/// struct TypeTable {
/// // ...
/// }
/// impl TypeTable {
/// fn type_of(&self, ident: Key) -> &str {
/// // ...
/// # ""
/// }
/// }
/// # struct State {
/// # interner: Rodeo,
/// # type_table: TypeTable,
/// # }
/// # let interner = new_interner();
/// # let state = &mut State { interner, type_table: TypeTable{} };
/// let mut builder = GreenNodeBuilder::with_interner(&mut state.interner);
/// # let input = "";
/// # builder.start_node(ROOT);
/// # builder.token(IDENT, "x");
/// # builder.finish_node();
/// let tree = parse(&mut builder, "x");
/// # let tree = SyntaxNode::<Lang>::new_root(builder.finish().0);
/// let type_table = &state.type_table;
/// let ident = tree.children_with_tokens().next().unwrap().into_token().unwrap();
/// let typ = type_table.type_of(ident.text_key());
/// ```
#[inline]
pub fn text_key(&self) -> Key {
self.green().text_key()
} }
/// Returns the unterlying green tree token of this token. /// Returns the unterlying green tree token of this token.
#[inline]
pub fn green(&self) -> &GreenToken { pub fn green(&self) -> &GreenToken {
self.parent self.parent
.green() .green()
@ -242,6 +313,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
/// Returns the next token in the tree. /// Returns the next token in the tree.
/// This is not necessary a direct sibling of this token, but will always be further right in the tree. /// This is not necessary a direct sibling of this token, but will always be further right in the tree.
#[inline]
pub fn next_token(&self) -> Option<&SyntaxToken<L, D>> { pub fn next_token(&self) -> Option<&SyntaxToken<L, D>> {
match self.next_sibling_or_token() { match self.next_sibling_or_token() {
Some(element) => element.first_token(), Some(element) => element.first_token(),
@ -255,6 +327,7 @@ impl<L: Language, D> SyntaxToken<L, D> {
/// Returns the previous token in the tree. /// Returns the previous token in the tree.
/// This is not necessary a direct sibling of this token, but will always be further left in the tree. /// This is not necessary a direct sibling of this token, but will always be further left in the tree.
#[inline]
pub fn prev_token(&self) -> Option<&SyntaxToken<L, D>> { pub fn prev_token(&self) -> Option<&SyntaxToken<L, D>> {
match self.prev_sibling_or_token() { match self.prev_sibling_or_token() {
Some(element) => element.last_token(), Some(element) => element.last_token(),

View file

@ -1,12 +1,13 @@
use crate::{build_recursive, build_tree_with_cache, ResolvedNode}; use crate::{build_recursive, build_tree_with_cache, ResolvedNode};
use super::{Element, SyntaxNode}; use super::{Element, SyntaxNode};
use cstree::{interning::IntoResolver, GreenNodeBuilder, NodeCache, NodeOrToken}; use cstree::{
interning::{IntoResolver, Rodeo},
GreenNodeBuilder, NodeCache, NodeOrToken,
};
use serde_test::Token; use serde_test::Token;
use std::fmt; use std::fmt;
type Rodeo = lasso::Rodeo<lasso::Spur, fxhash::FxBuildHasher>;
/// Macro for generating a list of `serde_test::Token`s using a simpler DSL. /// Macro for generating a list of `serde_test::Token`s using a simpler DSL.
macro_rules! event_tokens { macro_rules! event_tokens {
($($name:ident($($token:tt)*)),*) => { ($($name:ident($($token:tt)*)),*) => {