mirror of
https://github.com/RGBCube/cstree
synced 2025-07-27 09:07:44 +00:00
newtype default interner
This commit is contained in:
parent
d8ce241cf5
commit
fb41635961
13 changed files with 164 additions and 29 deletions
4
Cargo.lock
generated
4
Cargo.lock
generated
|
@ -95,9 +95,9 @@ checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lasso"
|
name = "lasso"
|
||||||
version = "0.4.1"
|
version = "0.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "17823787ed7c3f2ce99d4865d41edd4407b2fb6d9e71d534ec69d832a3ec2df3"
|
checksum = "4efb7b456e95cc1ae2de7b18b1e4d791467b46f0a3d02464e5a16ea502091640"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hashbrown",
|
"hashbrown",
|
||||||
]
|
]
|
||||||
|
|
|
@ -9,7 +9,7 @@ repository = "https://github.com/domenicquirl/cstree"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
lasso = "0.4.1"
|
lasso = "0.5"
|
||||||
text-size = "1.0.0"
|
text-size = "1.0.0"
|
||||||
fxhash= "0.2.1"
|
fxhash= "0.2.1"
|
||||||
servo_arc = { path = "vendor/servo_arc" }
|
servo_arc = { path = "vendor/servo_arc" }
|
||||||
|
|
|
@ -13,7 +13,10 @@
|
||||||
//! - "+" Token(Add)
|
//! - "+" Token(Add)
|
||||||
//! - "4" Token(Number)
|
//! - "4" Token(Number)
|
||||||
|
|
||||||
use cstree::{interning::Resolver, GreenNodeBuilder, NodeOrToken};
|
use cstree::{
|
||||||
|
interning::{IntoResolver, Resolver},
|
||||||
|
GreenNodeBuilder, NodeOrToken,
|
||||||
|
};
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
|
|
@ -59,7 +59,10 @@ impl cstree::Language for Lang {
|
||||||
/// offsets and parent pointers.
|
/// offsets and parent pointers.
|
||||||
/// cstree also deduplicates the actual source string in addition to the tree nodes, so we will need
|
/// cstree also deduplicates the actual source string in addition to the tree nodes, so we will need
|
||||||
/// the Resolver to get the real text back from the interned representation.
|
/// the Resolver to get the real text back from the interned representation.
|
||||||
use cstree::{interning::Resolver, GreenNode};
|
use cstree::{
|
||||||
|
interning::{IntoResolver, Resolver},
|
||||||
|
GreenNode,
|
||||||
|
};
|
||||||
|
|
||||||
/// You can construct GreenNodes by hand, but a builder is helpful for top-down parsers: it maintains
|
/// You can construct GreenNodes by hand, but a builder is helpful for top-down parsers: it maintains
|
||||||
/// a stack of currently in-progress nodes.
|
/// a stack of currently in-progress nodes.
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
mod builder;
|
mod builder;
|
||||||
mod element;
|
mod element;
|
||||||
|
mod interner;
|
||||||
mod node;
|
mod node;
|
||||||
mod token;
|
mod token;
|
||||||
|
|
||||||
|
@ -12,6 +13,7 @@ use self::element::{GreenElement, PackedGreenElement};
|
||||||
|
|
||||||
pub use self::{
|
pub use self::{
|
||||||
builder::{Checkpoint, GreenNodeBuilder, NodeCache},
|
builder::{Checkpoint, GreenNodeBuilder, NodeCache},
|
||||||
|
interner::TokenInterner,
|
||||||
node::{Children, GreenNode},
|
node::{Children, GreenNode},
|
||||||
token::GreenToken,
|
token::GreenToken,
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
use std::{convert::TryFrom, num::NonZeroUsize};
|
use std::convert::TryFrom;
|
||||||
|
|
||||||
use fxhash::{FxBuildHasher, FxHashMap};
|
use fxhash::FxHashMap;
|
||||||
use lasso::{Capacity, Rodeo, Spur};
|
|
||||||
use text_size::TextSize;
|
use text_size::TextSize;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
green::{GreenElement, GreenNode, GreenToken, SyntaxKind},
|
green::{interner::TokenInterner, GreenElement, GreenNode, GreenToken, SyntaxKind},
|
||||||
interning::Interner,
|
interning::Interner,
|
||||||
NodeOrToken,
|
NodeOrToken,
|
||||||
};
|
};
|
||||||
|
@ -21,13 +20,13 @@ const CHILDREN_CACHE_THRESHOLD: usize = 3;
|
||||||
/// A `NodeCache` deduplicates identical tokens and small nodes during tree construction.
|
/// A `NodeCache` deduplicates identical tokens and small nodes during tree construction.
|
||||||
/// You can re-use the same cache for multiple similar trees with [`GreenNodeBuilder::with_cache`].
|
/// You can re-use the same cache for multiple similar trees with [`GreenNodeBuilder::with_cache`].
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct NodeCache<'i, I = Rodeo<Spur, FxBuildHasher>> {
|
pub struct NodeCache<'i, I = TokenInterner> {
|
||||||
nodes: FxHashMap<GreenNodeHead, GreenNode>,
|
nodes: FxHashMap<GreenNodeHead, GreenNode>,
|
||||||
tokens: FxHashMap<GreenTokenData, GreenToken>,
|
tokens: FxHashMap<GreenTokenData, GreenToken>,
|
||||||
interner: MaybeOwned<'i, I>,
|
interner: MaybeOwned<'i, I>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NodeCache<'static, Rodeo<Spur, FxBuildHasher>> {
|
impl NodeCache<'static> {
|
||||||
/// Constructs a new, empty cache.
|
/// Constructs a new, empty cache.
|
||||||
///
|
///
|
||||||
/// By default, this will also create a default interner to deduplicate source text (strings) across
|
/// By default, this will also create a default interner to deduplicate source text (strings) across
|
||||||
|
@ -53,11 +52,7 @@ impl NodeCache<'static, Rodeo<Spur, FxBuildHasher>> {
|
||||||
Self {
|
Self {
|
||||||
nodes: FxHashMap::default(),
|
nodes: FxHashMap::default(),
|
||||||
tokens: FxHashMap::default(),
|
tokens: FxHashMap::default(),
|
||||||
interner: MaybeOwned::Owned(Rodeo::with_capacity_and_hasher(
|
interner: MaybeOwned::Owned(TokenInterner::new()),
|
||||||
// capacity values suggested by author of `lasso`
|
|
||||||
Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }),
|
|
||||||
FxBuildHasher::default(),
|
|
||||||
)),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -77,7 +72,7 @@ where
|
||||||
/// # Examples
|
/// # Examples
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::*;
|
/// # use cstree::*;
|
||||||
/// # use lasso::Rodeo;
|
/// use lasso::Rodeo;
|
||||||
/// # const ROOT: SyntaxKind = SyntaxKind(0);
|
/// # const ROOT: SyntaxKind = SyntaxKind(0);
|
||||||
/// # const INT: SyntaxKind = SyntaxKind(1);
|
/// # const INT: SyntaxKind = SyntaxKind(1);
|
||||||
/// # fn parse(b: &mut GreenNodeBuilder<Rodeo>, s: &str) {}
|
/// # fn parse(b: &mut GreenNodeBuilder<Rodeo>, s: &str) {}
|
||||||
|
@ -239,7 +234,7 @@ pub struct Checkpoint(usize);
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::*;
|
/// # use cstree::{*, interning::IntoResolver};
|
||||||
/// # const ROOT: SyntaxKind = SyntaxKind(0);
|
/// # const ROOT: SyntaxKind = SyntaxKind(0);
|
||||||
/// # const INT: SyntaxKind = SyntaxKind(1);
|
/// # const INT: SyntaxKind = SyntaxKind(1);
|
||||||
/// let mut builder = GreenNodeBuilder::new();
|
/// let mut builder = GreenNodeBuilder::new();
|
||||||
|
@ -254,13 +249,13 @@ pub struct Checkpoint(usize);
|
||||||
/// assert_eq!(int.as_token().unwrap().text(&resolver), "42");
|
/// assert_eq!(int.as_token().unwrap().text(&resolver), "42");
|
||||||
/// ```
|
/// ```
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct GreenNodeBuilder<'cache, 'interner, I = Rodeo<Spur, FxBuildHasher>> {
|
pub struct GreenNodeBuilder<'cache, 'interner, I = TokenInterner> {
|
||||||
cache: MaybeOwned<'cache, NodeCache<'interner, I>>,
|
cache: MaybeOwned<'cache, NodeCache<'interner, I>>,
|
||||||
parents: Vec<(SyntaxKind, usize)>,
|
parents: Vec<(SyntaxKind, usize)>,
|
||||||
children: Vec<GreenElement>,
|
children: Vec<GreenElement>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GreenNodeBuilder<'static, 'static, Rodeo<Spur, FxBuildHasher>> {
|
impl GreenNodeBuilder<'static, 'static> {
|
||||||
/// Creates new builder with an empty [`NodeCache`].
|
/// Creates new builder with an empty [`NodeCache`].
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
|
123
src/green/interner.rs
Normal file
123
src/green/interner.rs
Normal file
|
@ -0,0 +1,123 @@
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
|
use fxhash::FxBuildHasher;
|
||||||
|
use lasso::{Capacity, Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver, Rodeo, Spur};
|
||||||
|
|
||||||
|
/// The default [`Interner`] used to deduplicate green token strings.
|
||||||
|
pub struct TokenInterner {
|
||||||
|
rodeo: Rodeo<Spur, FxBuildHasher>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TokenInterner {
|
||||||
|
pub(super) fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
rodeo: Rodeo::with_capacity_and_hasher(
|
||||||
|
// capacity values suggested by author of `lasso`
|
||||||
|
Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }),
|
||||||
|
FxBuildHasher::default(),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Resolver for TokenInterner {
|
||||||
|
#[inline]
|
||||||
|
fn resolve<'a>(&'a self, key: &Spur) -> &'a str {
|
||||||
|
self.rodeo.resolve(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn try_resolve<'a>(&'a self, key: &Spur) -> Option<&'a str> {
|
||||||
|
self.rodeo.try_resolve(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
unsafe fn resolve_unchecked<'a>(&'a self, key: &Spur) -> &'a str {
|
||||||
|
self.rodeo.resolve_unchecked(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn contains_key(&self, key: &Spur) -> bool {
|
||||||
|
self.rodeo.contains_key(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
self.rodeo.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Reader for TokenInterner {
|
||||||
|
#[inline]
|
||||||
|
fn get(&self, val: &str) -> Option<Spur> {
|
||||||
|
self.rodeo.get(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn contains(&self, val: &str) -> bool {
|
||||||
|
self.rodeo.contains(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IntoResolver for TokenInterner {
|
||||||
|
type Resolver = <Rodeo<Spur, FxBuildHasher> as IntoResolver>::Resolver;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn into_resolver(self) -> Self::Resolver
|
||||||
|
where
|
||||||
|
Self: 'static,
|
||||||
|
{
|
||||||
|
self.rodeo.into_resolver()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn into_resolver_boxed(self: Box<Self>) -> Self::Resolver
|
||||||
|
where
|
||||||
|
Self: 'static,
|
||||||
|
{
|
||||||
|
Rodeo::<Spur, FxBuildHasher>::into_resolver_boxed(Box::new(self.rodeo))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Interner for TokenInterner {
|
||||||
|
#[inline]
|
||||||
|
fn get_or_intern(&mut self, val: &str) -> Spur {
|
||||||
|
self.rodeo.get_or_intern(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn try_get_or_intern(&mut self, val: &str) -> lasso::LassoResult<Spur> {
|
||||||
|
self.rodeo.try_get_or_intern(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn get_or_intern_static(&mut self, val: &'static str) -> Spur {
|
||||||
|
self.rodeo.get_or_intern_static(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn try_get_or_intern_static(&mut self, val: &'static str) -> lasso::LassoResult<Spur> {
|
||||||
|
self.rodeo.try_get_or_intern_static(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IntoReader for TokenInterner {
|
||||||
|
type Reader = <Rodeo<Spur, FxBuildHasher> as IntoReader>::Reader;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn into_reader(self) -> Self::Reader
|
||||||
|
where
|
||||||
|
Self: 'static,
|
||||||
|
{
|
||||||
|
self.rodeo.into_reader()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_reader_boxed(self: Box<Self>) -> Self::Reader
|
||||||
|
where
|
||||||
|
Self: 'static,
|
||||||
|
{
|
||||||
|
Rodeo::<Spur, FxBuildHasher>::into_reader_boxed(Box::new(self.rodeo))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IntoReaderAndResolver for TokenInterner {}
|
|
@ -63,7 +63,8 @@ mod utility_types;
|
||||||
|
|
||||||
/// Types and Traits for efficient String storage and deduplication.
|
/// Types and Traits for efficient String storage and deduplication.
|
||||||
pub mod interning {
|
pub mod interning {
|
||||||
pub use lasso::{Interner, Reader, Resolver};
|
pub use crate::green::TokenInterner;
|
||||||
|
pub use lasso::{Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver};
|
||||||
}
|
}
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
//! Serialization and Deserialization for syntax trees.
|
//! Serialization and Deserialization for syntax trees.
|
||||||
|
|
||||||
use crate::{interning::Resolver, GreenNodeBuilder, Language, NodeOrToken, SyntaxKind, SyntaxNode, WalkEvent};
|
use crate::{
|
||||||
|
interning::{IntoResolver, Resolver},
|
||||||
|
GreenNodeBuilder, Language, NodeOrToken, SyntaxKind, SyntaxNode, WalkEvent,
|
||||||
|
};
|
||||||
use serde::{
|
use serde::{
|
||||||
de::{Error, SeqAccess, Visitor},
|
de::{Error, SeqAccess, Visitor},
|
||||||
ser::SerializeTuple,
|
ser::SerializeTuple,
|
||||||
|
|
|
@ -443,7 +443,7 @@ impl<L: Language, D, R> SyntaxNode<L, D, R> {
|
||||||
///
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::*;
|
/// # use cstree::{*, interning::TokenInterner};
|
||||||
/// # #[allow(non_camel_case_types)]
|
/// # #[allow(non_camel_case_types)]
|
||||||
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
/// #[repr(u16)]
|
/// #[repr(u16)]
|
||||||
|
@ -468,7 +468,7 @@ impl<L: Language, D, R> SyntaxNode<L, D, R> {
|
||||||
/// }
|
/// }
|
||||||
/// # const ROOT: cstree::SyntaxKind = cstree::SyntaxKind(0);
|
/// # const ROOT: cstree::SyntaxKind = cstree::SyntaxKind(0);
|
||||||
/// # const TOKEN: cstree::SyntaxKind = cstree::SyntaxKind(1);
|
/// # const TOKEN: cstree::SyntaxKind = cstree::SyntaxKind(1);
|
||||||
/// # type SyntaxNode<L> = cstree::SyntaxNode<L, (), lasso::Rodeo<lasso::Spur, fxhash::FxBuildHasher>>;
|
/// # type SyntaxNode<L> = cstree::SyntaxNode<L, (), TokenInterner>;
|
||||||
/// let mut builder = GreenNodeBuilder::new();
|
/// let mut builder = GreenNodeBuilder::new();
|
||||||
/// builder.start_node(ROOT);
|
/// builder.start_node(ROOT);
|
||||||
/// builder.token(TOKEN, "content");
|
/// builder.token(TOKEN, "content");
|
||||||
|
|
|
@ -13,7 +13,7 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T
|
||||||
///
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
/// ```
|
/// ```
|
||||||
/// # use cstree::*;
|
/// # use cstree::{*, interning::IntoResolver};
|
||||||
/// # #[allow(non_camel_case_types)]
|
/// # #[allow(non_camel_case_types)]
|
||||||
/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
/// # #[repr(u16)]
|
/// # #[repr(u16)]
|
||||||
|
|
|
@ -7,8 +7,10 @@ use crossbeam_utils::thread::scope;
|
||||||
use std::{thread, time::Duration};
|
use std::{thread, time::Duration};
|
||||||
|
|
||||||
use common::{build_recursive, Element, SyntaxNode};
|
use common::{build_recursive, Element, SyntaxNode};
|
||||||
use cstree::GreenNodeBuilder;
|
use cstree::{
|
||||||
use lasso::Resolver;
|
interning::{IntoResolver, Resolver},
|
||||||
|
GreenNodeBuilder,
|
||||||
|
};
|
||||||
|
|
||||||
fn build_tree<D>(root: &Element<'_>) -> SyntaxNode<D, impl Resolver> {
|
fn build_tree<D>(root: &Element<'_>) -> SyntaxNode<D, impl Resolver> {
|
||||||
let mut builder = GreenNodeBuilder::new();
|
let mut builder = GreenNodeBuilder::new();
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
#![cfg(feature = "serde1")]
|
#![cfg(feature = "serde1")]
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
mod common;
|
mod common;
|
||||||
|
|
||||||
use common::{Element, SyntaxNode};
|
use common::{Element, SyntaxNode};
|
||||||
use cstree::{GreenNodeBuilder, NodeCache, NodeOrToken};
|
use cstree::{
|
||||||
use lasso::Resolver;
|
interning::{IntoResolver, Resolver},
|
||||||
|
GreenNodeBuilder, NodeCache, NodeOrToken,
|
||||||
|
};
|
||||||
use serde_test::Token;
|
use serde_test::Token;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue