1
Fork 0
mirror of https://github.com/RGBCube/cstree synced 2025-07-27 09:07:44 +00:00

document the green module

This commit is contained in:
Domenic Quirl 2021-02-11 17:28:13 +01:00
parent d5d3f7afa8
commit f711f00973
7 changed files with 120 additions and 40 deletions

View file

@ -1,3 +1,7 @@
//! Implementation of the inner, "green" tree.
//! The [`GreenNodeBuilder`] is the main entry point to constructing [`GreenNode`]s and
//! [`GreenToken`]s.
mod builder; mod builder;
mod element; mod element;
mod node; mod node;

View file

@ -18,6 +18,8 @@ use super::{node::GreenNodeHead, token::GreenTokenData};
/// this node into the cache. /// this node into the cache.
const CHILDREN_CACHE_THRESHOLD: usize = 3; const CHILDREN_CACHE_THRESHOLD: usize = 3;
/// A `NodeCache` deduplicates identical tokens and small nodes during tree construction.
/// You can re-use the same cache for multiple similar trees with [`GreenNodeBuilder::with_cache`].
#[derive(Debug)] #[derive(Debug)]
pub struct NodeCache<'i, I = Rodeo<Spur, FxBuildHasher>> { pub struct NodeCache<'i, I = Rodeo<Spur, FxBuildHasher>> {
nodes: FxHashMap<GreenNodeHead, GreenNode>, nodes: FxHashMap<GreenNodeHead, GreenNode>,
@ -26,6 +28,27 @@ pub struct NodeCache<'i, I = Rodeo<Spur, FxBuildHasher>> {
} }
impl NodeCache<'static, Rodeo<Spur, FxBuildHasher>> { impl NodeCache<'static, Rodeo<Spur, FxBuildHasher>> {
/// Constructs a new, empty cache.
///
/// By default, this will also create a default interner to deduplicate source text (strings) across
/// tokens. To re-use an existing interner, see [`with_interner`](NodeCache::with_interner).
/// # Examples
/// ```
/// # use cstree::*;
/// # const ROOT: SyntaxKind = SyntaxKind(0);
/// # const INT: SyntaxKind = SyntaxKind(1);
/// # fn parse(b: &mut GreenNodeBuilder, s: &str) {}
/// let mut cache = NodeCache::new();
/// let mut builder = GreenNodeBuilder::with_cache(&mut cache);
/// # builder.start_node(ROOT);
/// # builder.token(INT, "42");
/// # builder.finish_node();
/// parse(&mut builder, "42");
/// let (tree, _) = builder.finish();
/// assert_eq!(tree.kind(), ROOT);
/// let int = tree.children().next().unwrap();
/// assert_eq!(int.kind(), INT);
/// ```
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
nodes: FxHashMap::default(), nodes: FxHashMap::default(),
@ -49,6 +72,27 @@ impl<'i, I> NodeCache<'i, I>
where where
I: Interner, I: Interner,
{ {
/// Constructs a new, empty cache that will use the given interner to deduplicate source text
/// (strings) across tokens.
/// # Examples
/// ```
/// # use cstree::*;
/// # use lasso::Rodeo;
/// # const ROOT: SyntaxKind = SyntaxKind(0);
/// # const INT: SyntaxKind = SyntaxKind(1);
/// # fn parse(b: &mut GreenNodeBuilder<Rodeo>, s: &str) {}
/// let mut interner = Rodeo::new();
/// let mut cache = NodeCache::with_interner(&mut interner);
/// let mut builder = GreenNodeBuilder::with_cache(&mut cache);
/// # builder.start_node(ROOT);
/// # builder.token(INT, "42");
/// # builder.finish_node();
/// parse(&mut builder, "42");
/// let (tree, _) = builder.finish();
/// assert_eq!(tree.kind(), ROOT);
/// let int = tree.children().next().unwrap();
/// assert_eq!(int.kind(), INT);
/// ```
pub fn with_interner(interner: &'i mut I) -> Self { pub fn with_interner(interner: &'i mut I) -> Self {
Self { Self {
nodes: FxHashMap::default(), nodes: FxHashMap::default(),
@ -183,11 +227,32 @@ impl<T: Default> Default for MaybeOwned<'_, T> {
} }
} }
/// A checkpoint for maybe wrapping a node. See `GreenNodeBuilder::checkpoint` for details. /// A checkpoint for maybe wrapping a node. See [`GreenNodeBuilder::checkpoint`] for details.
#[derive(Clone, Copy, Debug)] #[derive(Clone, Copy, Debug)]
pub struct Checkpoint(usize); pub struct Checkpoint(usize);
/// A builder for a green tree. /// A builder for green trees.
/// Construct with [`new`](GreenNodeBuilder::new) or [`with_cache`](GreenNodeBuilder::with_cache). To
/// add tree nodes, start them with [`start_node`](GreenNodeBuilder::start_node), add
/// [`token`](GreenNodeBuilder::token)s and then [`finish_node`](GreenNodeBuilder::finish_node). When
/// the whole tree is constructed, call [`finish`](GreenNodeBuilder::finish) to obtain the root.
///
/// # Examples
/// ```
/// # use cstree::*;
/// # const ROOT: SyntaxKind = SyntaxKind(0);
/// # const INT: SyntaxKind = SyntaxKind(1);
/// let mut builder = GreenNodeBuilder::new();
/// builder.start_node(ROOT);
/// builder.token(INT, "42");
/// builder.finish_node();
/// let (tree, interner) = builder.finish();
/// assert_eq!(tree.kind(), ROOT);
/// let int = tree.children().next().unwrap();
/// assert_eq!(int.kind(), INT);
/// let resolver = interner.unwrap().into_resolver();
/// assert_eq!(int.as_token().unwrap().text(&resolver), "42");
/// ```
#[derive(Debug)] #[derive(Debug)]
pub struct GreenNodeBuilder<'cache, 'interner, I = Rodeo<Spur, FxBuildHasher>> { pub struct GreenNodeBuilder<'cache, 'interner, I = Rodeo<Spur, FxBuildHasher>> {
cache: MaybeOwned<'cache, NodeCache<'interner, I>>, cache: MaybeOwned<'cache, NodeCache<'interner, I>>,
@ -196,7 +261,7 @@ pub struct GreenNodeBuilder<'cache, 'interner, I = Rodeo<Spur, FxBuildHasher>> {
} }
impl GreenNodeBuilder<'static, 'static, Rodeo<Spur, FxBuildHasher>> { impl GreenNodeBuilder<'static, 'static, Rodeo<Spur, FxBuildHasher>> {
/// Creates new builder. /// Creates new builder with an empty [`NodeCache`].
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
cache: MaybeOwned::Owned(NodeCache::new()), cache: MaybeOwned::Owned(NodeCache::new()),
@ -216,8 +281,8 @@ impl<'cache, 'interner, I> GreenNodeBuilder<'cache, 'interner, I>
where where
I: Interner, I: Interner,
{ {
/// Reusing `NodeCache` between different `GreenNodeBuilder`s saves memory. /// Reusing a [`NodeCache`] between multiple builders saves memory, as it allows to structurally
/// It allows to structurally share underlying trees. /// share underlying trees.
pub fn with_cache(cache: &'cache mut NodeCache<'interner, I>) -> Self { pub fn with_cache(cache: &'cache mut NodeCache<'interner, I>) -> Self {
Self { Self {
cache: MaybeOwned::Borrowed(cache), cache: MaybeOwned::Borrowed(cache),
@ -226,22 +291,21 @@ where
} }
} }
/// Adds new token to the current branch. /// Add new token to the current branch.
#[inline] #[inline]
pub fn token(&mut self, kind: SyntaxKind, text: &str) { pub fn token(&mut self, kind: SyntaxKind, text: &str) {
let token = self.cache.token(kind, text); let token = self.cache.token(kind, text);
self.children.push(token.into()); self.children.push(token.into());
} }
/// Start new node and make it current. /// Start new node of the given `kind` and make it current.
#[inline] #[inline]
pub fn start_node(&mut self, kind: SyntaxKind) { pub fn start_node(&mut self, kind: SyntaxKind) {
let len = self.children.len(); let len = self.children.len();
self.parents.push((kind, len)); self.parents.push((kind, len));
} }
/// Finish current branch and restore previous /// Finish the current branch and restore the previous branch as current.
/// branch as current.
#[inline] #[inline]
pub fn finish_node(&mut self) { pub fn finish_node(&mut self) {
let (kind, first_child) = self.parents.pop().unwrap(); let (kind, first_child) = self.parents.pop().unwrap();
@ -250,12 +314,13 @@ where
self.children.push(node.into()); self.children.push(node.into());
} }
/// Prepare for maybe wrapping the next node. /// Prepare for maybe wrapping the next node with a surrounding node.
/// The way wrapping works is that you first of all get a checkpoint, ///
/// then you place all tokens you want to wrap, and then *maybe* call /// The way wrapping works is that you first get a checkpoint, then you add nodes and tokens as
/// `start_node_at`. /// normal, and then you *maybe* call [`start_node_at`](GreenNodeBuilder::start_node_at).
/// Example: ///
/// ```rust /// # Examples
/// ```
/// # use cstree::{GreenNodeBuilder, SyntaxKind}; /// # use cstree::{GreenNodeBuilder, SyntaxKind};
/// # const PLUS: SyntaxKind = SyntaxKind(0); /// # const PLUS: SyntaxKind = SyntaxKind(0);
/// # const OPERATION: SyntaxKind = SyntaxKind(1); /// # const OPERATION: SyntaxKind = SyntaxKind(1);
@ -280,8 +345,8 @@ where
Checkpoint(self.children.len()) Checkpoint(self.children.len())
} }
/// Wrap the previous branch marked by `checkpoint` in a new branch and /// Wrap the previous branch marked by [`checkpoint`](GreenNodeBuilder::checkpoint) in a new
/// make it current. /// branch and make it current.
#[inline] #[inline]
pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) { pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
let Checkpoint(checkpoint) = checkpoint; let Checkpoint(checkpoint) = checkpoint;
@ -300,9 +365,16 @@ where
self.parents.push((kind, checkpoint)); self.parents.push((kind, checkpoint));
} }
/// Complete tree building. Make sure that /// Complete building the tree.
/// `start_node_at` and `finish_node` calls ///
/// are paired! /// Make sure that calls to [`start_node`](GreenNodeBuilder::start_node) /
/// [`start_node_at`](GreenNodeBuilder::start_node_at) and
/// [`finish_node`](GreenNodeBuilder::finish_node) are balanced, i.e. that every started node has
/// been completed!
///
/// If this builder was constructed with [`new`](GreenNodeBuilder::new), this method returns the
/// interner used to deduplicate source text (strings) as its second return value to allow
/// resolving tree tokens back to text and re-using the interner to build additonal trees.
#[inline] #[inline]
pub fn finish(mut self) -> (GreenNode, Option<I>) { pub fn finish(mut self) -> (GreenNode, Option<I>) {
assert_eq!(self.children.len(), 1); assert_eq!(self.children.len(), 1);

View file

@ -1,6 +1,6 @@
use std::{fmt, hash, mem}; use std::{fmt, hash, mem};
// NOTE: From `thin_dst`: // NOTE from `thin_dst`:
// This MUST be size=1 such that pointer math actually advances the pointer. // This MUST be size=1 such that pointer math actually advances the pointer.
type ErasedPtr = *const u8; type ErasedPtr = *const u8;

View file

@ -12,7 +12,7 @@ use crate::{
TextSize, TextSize,
}; };
#[repr(align(2))] // NB: this is an at-least annotation #[repr(align(2))] //to use 1 bit for pointer tagging. NB: this is an at-least annotation
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(super) struct GreenNodeHead { pub(super) struct GreenNodeHead {
kind: SyntaxKind, kind: SyntaxKind,
@ -40,8 +40,8 @@ impl GreenNodeHead {
} }
} }
/// Internal node in the immutable tree. /// Internal node in the immutable "green" tree.
/// It has other nodes and tokens as children. /// It contains other nodes and tokens as its children.
#[derive(Clone)] #[derive(Clone)]
pub struct GreenNode { pub struct GreenNode {
pub(super) data: ThinArc<GreenNodeHead, PackedGreenElement>, pub(super) data: ThinArc<GreenNodeHead, PackedGreenElement>,
@ -54,7 +54,7 @@ impl std::fmt::Debug for GreenNode {
} }
impl GreenNode { impl GreenNode {
/// Creates new Node. /// Creates a new Node.
#[inline] #[inline]
pub fn new<I>(kind: SyntaxKind, children: I) -> GreenNode pub fn new<I>(kind: SyntaxKind, children: I) -> GreenNode
where where
@ -103,19 +103,19 @@ impl GreenNode {
} }
} }
/// Kind of this node. /// [`SyntaxKind`] of this node.
#[inline] #[inline]
pub fn kind(&self) -> SyntaxKind { pub fn kind(&self) -> SyntaxKind {
self.data.header.header.kind self.data.header.header.kind
} }
/// Returns the length of the text covered by this node. /// Returns the length of text covered by this node.
#[inline] #[inline]
pub fn text_len(&self) -> TextSize { pub fn text_len(&self) -> TextSize {
self.data.header.header.text_len self.data.header.header.text_len
} }
/// Children of this node. /// Iterator over all children of this node.
#[inline] #[inline]
pub fn children(&self) -> Children<'_> { pub fn children(&self) -> Children<'_> {
Children { Children {
@ -139,6 +139,7 @@ impl PartialEq for GreenNode {
impl Eq for GreenNode {} impl Eq for GreenNode {}
/// An iterator over a [`GreenNode`]'s children.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Children<'a> { pub struct Children<'a> {
inner: slice::Iter<'a, PackedGreenElement>, inner: slice::Iter<'a, PackedGreenElement>,

View file

@ -4,15 +4,15 @@ use std::{fmt, hash, mem::ManuallyDrop, ptr};
use crate::{green::SyntaxKind, interning::Resolver, TextSize}; use crate::{green::SyntaxKind, interning::Resolver, TextSize};
use lasso::Spur; use lasso::Spur;
#[repr(align(2))] // NB: this is an at-least annotation #[repr(align(2))] // to use 1 bit for pointer tagging. NB: this is an at-least annotation
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] #[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
pub struct GreenTokenData { pub(super) struct GreenTokenData {
pub kind: SyntaxKind, pub(super) kind: SyntaxKind,
pub text: Spur, pub(super) text: Spur,
pub text_len: TextSize, pub(super) text_len: TextSize,
} }
/// Leaf node in the immutable tree. /// Leaf node in the immutable "green" tree.
pub struct GreenToken { pub struct GreenToken {
ptr: ptr::NonNull<GreenTokenData>, ptr: ptr::NonNull<GreenTokenData>,
} }
@ -39,9 +39,9 @@ impl GreenToken {
unsafe { &*Self::remove_tag(self.ptr).as_ptr() } unsafe { &*Self::remove_tag(self.ptr).as_ptr() }
} }
/// Creates new Token. /// Creates a new Token.
#[inline] #[inline]
pub fn new(data: GreenTokenData) -> GreenToken { pub(super) fn new(data: GreenTokenData) -> GreenToken {
let ptr = Arc::into_raw(Arc::new(data)); let ptr = Arc::into_raw(Arc::new(data));
let ptr = ptr::NonNull::new(ptr as *mut _).unwrap(); let ptr = ptr::NonNull::new(ptr as *mut _).unwrap();
GreenToken { GreenToken {
@ -49,13 +49,13 @@ impl GreenToken {
} }
} }
/// Kind of this Token. /// [`SyntaxKind`] of this Token.
#[inline] #[inline]
pub fn kind(&self) -> SyntaxKind { pub fn kind(&self) -> SyntaxKind {
self.data().kind self.data().kind
} }
/// Text of this Token. /// The original source text of this Token.
#[inline] #[inline]
pub fn text<'i, I>(&self, resolver: &'i I) -> &'i str pub fn text<'i, I>(&self, resolver: &'i I) -> &'i str
where where
@ -64,7 +64,7 @@ impl GreenToken {
resolver.resolve(&self.data().text) resolver.resolve(&self.data().text)
} }
/// Returns the length of the text covered by this token. /// Returns the length of text covered by this token.
#[inline] #[inline]
pub fn text_len(&self) -> TextSize { pub fn text_len(&self) -> TextSize {
self.data().text_len self.data().text_len

View file

@ -47,7 +47,7 @@
// missing_debug_implementations, // missing_debug_implementations,
unconditional_recursion, unconditional_recursion,
future_incompatible, future_incompatible,
// missing_docs, //missing_docs,
)] )]
#![deny(unsafe_code)] #![deny(unsafe_code)]

View file

@ -1,3 +1,6 @@
/// Convenience type to represent tree elements which may either be a node or a token.
///
/// Used for both red and green tree, references to elements, ...
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum NodeOrToken<N, T> { pub enum NodeOrToken<N, T> {
Node(N), Node(N),