mirror of
https://github.com/RGBCube/cstree
synced 2025-07-27 09:07:44 +00:00
newtype default interner
This commit is contained in:
parent
d8ce241cf5
commit
fb41635961
13 changed files with 164 additions and 29 deletions
4
Cargo.lock
generated
4
Cargo.lock
generated
|
@ -95,9 +95,9 @@ checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
|
|||
|
||||
[[package]]
|
||||
name = "lasso"
|
||||
version = "0.4.1"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17823787ed7c3f2ce99d4865d41edd4407b2fb6d9e71d534ec69d832a3ec2df3"
|
||||
checksum = "4efb7b456e95cc1ae2de7b18b1e4d791467b46f0a3d02464e5a16ea502091640"
|
||||
dependencies = [
|
||||
"hashbrown",
|
||||
]
|
||||
|
|
|
@ -9,7 +9,7 @@ repository = "https://github.com/domenicquirl/cstree"
|
|||
readme = "README.md"
|
||||
|
||||
[dependencies]
|
||||
lasso = "0.4.1"
|
||||
lasso = "0.5"
|
||||
text-size = "1.0.0"
|
||||
fxhash= "0.2.1"
|
||||
servo_arc = { path = "vendor/servo_arc" }
|
||||
|
|
|
@ -13,7 +13,10 @@
|
|||
//! - "+" Token(Add)
|
||||
//! - "4" Token(Number)
|
||||
|
||||
use cstree::{interning::Resolver, GreenNodeBuilder, NodeOrToken};
|
||||
use cstree::{
|
||||
interning::{IntoResolver, Resolver},
|
||||
GreenNodeBuilder, NodeOrToken,
|
||||
};
|
||||
use std::iter::Peekable;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
|
|
|
@ -59,7 +59,10 @@ impl cstree::Language for Lang {
|
|||
/// offsets and parent pointers.
|
||||
/// cstree also deduplicates the actual source string in addition to the tree nodes, so we will need
|
||||
/// the Resolver to get the real text back from the interned representation.
|
||||
use cstree::{interning::Resolver, GreenNode};
|
||||
use cstree::{
|
||||
interning::{IntoResolver, Resolver},
|
||||
GreenNode,
|
||||
};
|
||||
|
||||
/// You can construct GreenNodes by hand, but a builder is helpful for top-down parsers: it maintains
|
||||
/// a stack of currently in-progress nodes.
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
mod builder;
|
||||
mod element;
|
||||
mod interner;
|
||||
mod node;
|
||||
mod token;
|
||||
|
||||
|
@ -12,6 +13,7 @@ use self::element::{GreenElement, PackedGreenElement};
|
|||
|
||||
pub use self::{
|
||||
builder::{Checkpoint, GreenNodeBuilder, NodeCache},
|
||||
interner::TokenInterner,
|
||||
node::{Children, GreenNode},
|
||||
token::GreenToken,
|
||||
};
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
use std::{convert::TryFrom, num::NonZeroUsize};
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use fxhash::{FxBuildHasher, FxHashMap};
|
||||
use lasso::{Capacity, Rodeo, Spur};
|
||||
use fxhash::FxHashMap;
|
||||
use text_size::TextSize;
|
||||
|
||||
use crate::{
|
||||
green::{GreenElement, GreenNode, GreenToken, SyntaxKind},
|
||||
green::{interner::TokenInterner, GreenElement, GreenNode, GreenToken, SyntaxKind},
|
||||
interning::Interner,
|
||||
NodeOrToken,
|
||||
};
|
||||
|
@ -21,13 +20,13 @@ const CHILDREN_CACHE_THRESHOLD: usize = 3;
|
|||
/// A `NodeCache` deduplicates identical tokens and small nodes during tree construction.
|
||||
/// You can re-use the same cache for multiple similar trees with [`GreenNodeBuilder::with_cache`].
|
||||
#[derive(Debug)]
|
||||
pub struct NodeCache<'i, I = Rodeo<Spur, FxBuildHasher>> {
|
||||
pub struct NodeCache<'i, I = TokenInterner> {
|
||||
nodes: FxHashMap<GreenNodeHead, GreenNode>,
|
||||
tokens: FxHashMap<GreenTokenData, GreenToken>,
|
||||
interner: MaybeOwned<'i, I>,
|
||||
}
|
||||
|
||||
impl NodeCache<'static, Rodeo<Spur, FxBuildHasher>> {
|
||||
impl NodeCache<'static> {
|
||||
/// Constructs a new, empty cache.
|
||||
///
|
||||
/// By default, this will also create a default interner to deduplicate source text (strings) across
|
||||
|
@ -53,11 +52,7 @@ impl NodeCache<'static, Rodeo<Spur, FxBuildHasher>> {
|
|||
Self {
|
||||
nodes: FxHashMap::default(),
|
||||
tokens: FxHashMap::default(),
|
||||
interner: MaybeOwned::Owned(Rodeo::with_capacity_and_hasher(
|
||||
// capacity values suggested by author of `lasso`
|
||||
Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }),
|
||||
FxBuildHasher::default(),
|
||||
)),
|
||||
interner: MaybeOwned::Owned(TokenInterner::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -77,7 +72,7 @@ where
|
|||
/// # Examples
|
||||
/// ```
|
||||
/// # use cstree::*;
|
||||
/// # use lasso::Rodeo;
|
||||
/// use lasso::Rodeo;
|
||||
/// # const ROOT: SyntaxKind = SyntaxKind(0);
|
||||
/// # const INT: SyntaxKind = SyntaxKind(1);
|
||||
/// # fn parse(b: &mut GreenNodeBuilder<Rodeo>, s: &str) {}
|
||||
|
@ -239,7 +234,7 @@ pub struct Checkpoint(usize);
|
|||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// # use cstree::*;
|
||||
/// # use cstree::{*, interning::IntoResolver};
|
||||
/// # const ROOT: SyntaxKind = SyntaxKind(0);
|
||||
/// # const INT: SyntaxKind = SyntaxKind(1);
|
||||
/// let mut builder = GreenNodeBuilder::new();
|
||||
|
@ -254,13 +249,13 @@ pub struct Checkpoint(usize);
|
|||
/// assert_eq!(int.as_token().unwrap().text(&resolver), "42");
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct GreenNodeBuilder<'cache, 'interner, I = Rodeo<Spur, FxBuildHasher>> {
|
||||
pub struct GreenNodeBuilder<'cache, 'interner, I = TokenInterner> {
|
||||
cache: MaybeOwned<'cache, NodeCache<'interner, I>>,
|
||||
parents: Vec<(SyntaxKind, usize)>,
|
||||
children: Vec<GreenElement>,
|
||||
}
|
||||
|
||||
impl GreenNodeBuilder<'static, 'static, Rodeo<Spur, FxBuildHasher>> {
|
||||
impl GreenNodeBuilder<'static, 'static> {
|
||||
/// Creates new builder with an empty [`NodeCache`].
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
|
|
123
src/green/interner.rs
Normal file
123
src/green/interner.rs
Normal file
|
@ -0,0 +1,123 @@
|
|||
use std::num::NonZeroUsize;
|
||||
|
||||
use fxhash::FxBuildHasher;
|
||||
use lasso::{Capacity, Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver, Rodeo, Spur};
|
||||
|
||||
/// The default [`Interner`] used to deduplicate green token strings.
|
||||
pub struct TokenInterner {
|
||||
rodeo: Rodeo<Spur, FxBuildHasher>,
|
||||
}
|
||||
|
||||
impl TokenInterner {
|
||||
pub(super) fn new() -> Self {
|
||||
Self {
|
||||
rodeo: Rodeo::with_capacity_and_hasher(
|
||||
// capacity values suggested by author of `lasso`
|
||||
Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }),
|
||||
FxBuildHasher::default(),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Resolver for TokenInterner {
|
||||
#[inline]
|
||||
fn resolve<'a>(&'a self, key: &Spur) -> &'a str {
|
||||
self.rodeo.resolve(key)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn try_resolve<'a>(&'a self, key: &Spur) -> Option<&'a str> {
|
||||
self.rodeo.try_resolve(key)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn resolve_unchecked<'a>(&'a self, key: &Spur) -> &'a str {
|
||||
self.rodeo.resolve_unchecked(key)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn contains_key(&self, key: &Spur) -> bool {
|
||||
self.rodeo.contains_key(key)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn len(&self) -> usize {
|
||||
self.rodeo.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl Reader for TokenInterner {
|
||||
#[inline]
|
||||
fn get(&self, val: &str) -> Option<Spur> {
|
||||
self.rodeo.get(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn contains(&self, val: &str) -> bool {
|
||||
self.rodeo.contains(val)
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoResolver for TokenInterner {
|
||||
type Resolver = <Rodeo<Spur, FxBuildHasher> as IntoResolver>::Resolver;
|
||||
|
||||
#[inline]
|
||||
fn into_resolver(self) -> Self::Resolver
|
||||
where
|
||||
Self: 'static,
|
||||
{
|
||||
self.rodeo.into_resolver()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn into_resolver_boxed(self: Box<Self>) -> Self::Resolver
|
||||
where
|
||||
Self: 'static,
|
||||
{
|
||||
Rodeo::<Spur, FxBuildHasher>::into_resolver_boxed(Box::new(self.rodeo))
|
||||
}
|
||||
}
|
||||
|
||||
impl Interner for TokenInterner {
|
||||
#[inline]
|
||||
fn get_or_intern(&mut self, val: &str) -> Spur {
|
||||
self.rodeo.get_or_intern(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn try_get_or_intern(&mut self, val: &str) -> lasso::LassoResult<Spur> {
|
||||
self.rodeo.try_get_or_intern(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_or_intern_static(&mut self, val: &'static str) -> Spur {
|
||||
self.rodeo.get_or_intern_static(val)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn try_get_or_intern_static(&mut self, val: &'static str) -> lasso::LassoResult<Spur> {
|
||||
self.rodeo.try_get_or_intern_static(val)
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoReader for TokenInterner {
|
||||
type Reader = <Rodeo<Spur, FxBuildHasher> as IntoReader>::Reader;
|
||||
|
||||
#[inline]
|
||||
fn into_reader(self) -> Self::Reader
|
||||
where
|
||||
Self: 'static,
|
||||
{
|
||||
self.rodeo.into_reader()
|
||||
}
|
||||
|
||||
fn into_reader_boxed(self: Box<Self>) -> Self::Reader
|
||||
where
|
||||
Self: 'static,
|
||||
{
|
||||
Rodeo::<Spur, FxBuildHasher>::into_reader_boxed(Box::new(self.rodeo))
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoReaderAndResolver for TokenInterner {}
|
|
@ -63,7 +63,8 @@ mod utility_types;
|
|||
|
||||
/// Types and Traits for efficient String storage and deduplication.
|
||||
pub mod interning {
|
||||
pub use lasso::{Interner, Reader, Resolver};
|
||||
pub use crate::green::TokenInterner;
|
||||
pub use lasso::{Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver};
|
||||
}
|
||||
use std::fmt;
|
||||
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
//! Serialization and Deserialization for syntax trees.
|
||||
|
||||
use crate::{interning::Resolver, GreenNodeBuilder, Language, NodeOrToken, SyntaxKind, SyntaxNode, WalkEvent};
|
||||
use crate::{
|
||||
interning::{IntoResolver, Resolver},
|
||||
GreenNodeBuilder, Language, NodeOrToken, SyntaxKind, SyntaxNode, WalkEvent,
|
||||
};
|
||||
use serde::{
|
||||
de::{Error, SeqAccess, Visitor},
|
||||
ser::SerializeTuple,
|
||||
|
|
|
@ -443,7 +443,7 @@ impl<L: Language, D, R> SyntaxNode<L, D, R> {
|
|||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// # use cstree::*;
|
||||
/// # use cstree::{*, interning::TokenInterner};
|
||||
/// # #[allow(non_camel_case_types)]
|
||||
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
/// #[repr(u16)]
|
||||
|
@ -468,7 +468,7 @@ impl<L: Language, D, R> SyntaxNode<L, D, R> {
|
|||
/// }
|
||||
/// # const ROOT: cstree::SyntaxKind = cstree::SyntaxKind(0);
|
||||
/// # const TOKEN: cstree::SyntaxKind = cstree::SyntaxKind(1);
|
||||
/// # type SyntaxNode<L> = cstree::SyntaxNode<L, (), lasso::Rodeo<lasso::Spur, fxhash::FxBuildHasher>>;
|
||||
/// # type SyntaxNode<L> = cstree::SyntaxNode<L, (), TokenInterner>;
|
||||
/// let mut builder = GreenNodeBuilder::new();
|
||||
/// builder.start_node(ROOT);
|
||||
/// builder.token(TOKEN, "content");
|
||||
|
|
|
@ -13,7 +13,7 @@ use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, T
|
|||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// # use cstree::*;
|
||||
/// # use cstree::{*, interning::IntoResolver};
|
||||
/// # #[allow(non_camel_case_types)]
|
||||
/// # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
/// # #[repr(u16)]
|
||||
|
|
|
@ -7,8 +7,10 @@ use crossbeam_utils::thread::scope;
|
|||
use std::{thread, time::Duration};
|
||||
|
||||
use common::{build_recursive, Element, SyntaxNode};
|
||||
use cstree::GreenNodeBuilder;
|
||||
use lasso::Resolver;
|
||||
use cstree::{
|
||||
interning::{IntoResolver, Resolver},
|
||||
GreenNodeBuilder,
|
||||
};
|
||||
|
||||
fn build_tree<D>(root: &Element<'_>) -> SyntaxNode<D, impl Resolver> {
|
||||
let mut builder = GreenNodeBuilder::new();
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
#![cfg(feature = "serde1")]
|
||||
|
||||
#[allow(unused)]
|
||||
mod common;
|
||||
|
||||
use common::{Element, SyntaxNode};
|
||||
use cstree::{GreenNodeBuilder, NodeCache, NodeOrToken};
|
||||
use lasso::Resolver;
|
||||
use cstree::{
|
||||
interning::{IntoResolver, Resolver},
|
||||
GreenNodeBuilder, NodeCache, NodeOrToken,
|
||||
};
|
||||
use serde_test::Token;
|
||||
use std::fmt;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue