mirror of
https://github.com/RGBCube/cstree
synced 2025-07-27 09:07:44 +00:00
Use 32 bits for RawSyntaxKind
internally (#49)
This commit is contained in:
parent
8750498b7a
commit
2aa543036f
11 changed files with 42 additions and 39 deletions
|
@ -11,6 +11,7 @@
|
||||||
* Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements.
|
* Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements.
|
||||||
* Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option<SyntaxNode>` is now the same size as `SyntaxNode` itself: the size of a pointer).
|
* Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option<SyntaxNode>` is now the same size as `SyntaxNode` itself: the size of a pointer).
|
||||||
* `SyntaxKind` has been renamed to `RawSyntaxKind` to no longer conflict with user-defined `SyntaxKind` enumerations.
|
* `SyntaxKind` has been renamed to `RawSyntaxKind` to no longer conflict with user-defined `SyntaxKind` enumerations.
|
||||||
|
* `RawSyntaxKind` has been changed to use a 32-bit index internally, which means existing `Language` implementations and syntax kind `enum`s need to be adjusted to `#[repr(u32)]` and the corresponding conversions.
|
||||||
* The crate's export module structure has been reorganized to give different groups of definitions their own submodules. A `cstree::prelude` module is available, containing the most commonly needed types that were previously accessible via `use cstree::*`. Otherwise, the module structure is now as follows:
|
* The crate's export module structure has been reorganized to give different groups of definitions their own submodules. A `cstree::prelude` module is available, containing the most commonly needed types that were previously accessible via `use cstree::*`. Otherwise, the module structure is now as follows:
|
||||||
* `cstree`
|
* `cstree`
|
||||||
* `Language`
|
* `Language`
|
||||||
|
|
|
@ -16,7 +16,7 @@ pub enum Element<'s> {
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub enum TestKind {
|
pub enum TestKind {
|
||||||
Element { n: u16 },
|
Element { n: u32 },
|
||||||
Plus,
|
Plus,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,7 +45,7 @@ impl<T: Bool> Language for TestLang<T> {
|
||||||
type Kind = TestKind;
|
type Kind = TestKind;
|
||||||
|
|
||||||
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||||
if raw.0 == u16::MAX - 1 {
|
if raw.0 == u32::MAX - 1 {
|
||||||
TestKind::Plus
|
TestKind::Plus
|
||||||
} else {
|
} else {
|
||||||
TestKind::Element { n: raw.0 }
|
TestKind::Element { n: raw.0 }
|
||||||
|
@ -55,7 +55,7 @@ impl<T: Bool> Language for TestLang<T> {
|
||||||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||||
match kind {
|
match kind {
|
||||||
TestKind::Element { n } => RawSyntaxKind(n),
|
TestKind::Element { n } => RawSyntaxKind(n),
|
||||||
TestKind::Plus => RawSyntaxKind(u16::MAX - 1),
|
TestKind::Plus => RawSyntaxKind(u32::MAX - 1),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,8 +85,8 @@ where
|
||||||
pub fn build_recursive<T: Bool, I>(
|
pub fn build_recursive<T: Bool, I>(
|
||||||
root: &Element<'_>,
|
root: &Element<'_>,
|
||||||
builder: &mut GreenNodeBuilder<'_, '_, TestLang<T>, I>,
|
builder: &mut GreenNodeBuilder<'_, '_, TestLang<T>, I>,
|
||||||
mut from: u16,
|
mut from: u32,
|
||||||
) -> u16
|
) -> u32
|
||||||
where
|
where
|
||||||
I: Interner,
|
I: Interner,
|
||||||
{
|
{
|
||||||
|
|
|
@ -17,7 +17,7 @@ use cstree::{build::GreenNodeBuilder, interning::Resolver, util::NodeOrToken};
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
#[repr(u16)]
|
#[repr(u32)]
|
||||||
enum SyntaxKind {
|
enum SyntaxKind {
|
||||||
Whitespace = 0,
|
Whitespace = 0,
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ use SyntaxKind::*;
|
||||||
|
|
||||||
impl From<SyntaxKind> for cstree::RawSyntaxKind {
|
impl From<SyntaxKind> for cstree::RawSyntaxKind {
|
||||||
fn from(kind: SyntaxKind) -> Self {
|
fn from(kind: SyntaxKind) -> Self {
|
||||||
Self(kind as u16)
|
Self(kind as u32)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,8 +45,8 @@ impl cstree::Language for Lang {
|
||||||
type Kind = SyntaxKind;
|
type Kind = SyntaxKind;
|
||||||
|
|
||||||
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||||
assert!(raw.0 <= Root as u16);
|
assert!(raw.0 <= Root as u32);
|
||||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||||
|
|
|
@ -7,7 +7,7 @@ use cstree::{
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
#[repr(u16)]
|
#[repr(u32)]
|
||||||
pub enum SyntaxKind {
|
pub enum SyntaxKind {
|
||||||
/* Tokens */
|
/* Tokens */
|
||||||
Int, // 42
|
Int, // 42
|
||||||
|
@ -42,7 +42,7 @@ impl Language for Calculator {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||||
RawSyntaxKind(kind as u16)
|
RawSyntaxKind(kind as u32)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
/// Let's start with defining all kinds of tokens and composite nodes.
|
/// Let's start with defining all kinds of tokens and composite nodes.
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
#[repr(u16)]
|
#[repr(u32)]
|
||||||
pub enum SyntaxKind {
|
pub enum SyntaxKind {
|
||||||
LParen = 0, // '('
|
LParen = 0, // '('
|
||||||
RParen, // ')'
|
RParen, // ')'
|
||||||
|
@ -32,7 +32,7 @@ use SyntaxKind::*;
|
||||||
/// First, to easily pass the enum variants into cstree via `.into()`:
|
/// First, to easily pass the enum variants into cstree via `.into()`:
|
||||||
impl From<SyntaxKind> for cstree::RawSyntaxKind {
|
impl From<SyntaxKind> for cstree::RawSyntaxKind {
|
||||||
fn from(kind: SyntaxKind) -> Self {
|
fn from(kind: SyntaxKind) -> Self {
|
||||||
Self(kind as u16)
|
Self(kind as u32)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,8 +45,8 @@ impl cstree::Language for Lang {
|
||||||
type Kind = SyntaxKind;
|
type Kind = SyntaxKind;
|
||||||
|
|
||||||
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||||
assert!(raw.0 <= Root as u16);
|
assert!(raw.0 <= Root as u32);
|
||||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||||
|
@ -421,7 +421,7 @@ nan
|
||||||
/// Split the input string into a flat list of tokens (such as L_PAREN, WORD, and WHITESPACE)
|
/// Split the input string into a flat list of tokens (such as L_PAREN, WORD, and WHITESPACE)
|
||||||
fn lex(text: &str) -> VecDeque<(SyntaxKind, &str)> {
|
fn lex(text: &str) -> VecDeque<(SyntaxKind, &str)> {
|
||||||
fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
|
fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
|
||||||
m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0)
|
m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0 as u16)
|
||||||
}
|
}
|
||||||
fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
|
fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
|
||||||
match t.0 {
|
match t.0 {
|
||||||
|
|
24
src/lib.rs
24
src/lib.rs
|
@ -58,12 +58,12 @@
|
||||||
//!
|
//!
|
||||||
//! First, we need to list the different part of our language's grammar.
|
//! First, we need to list the different part of our language's grammar.
|
||||||
//! We can do that using an `enum` with a unit variant for any terminal and non-terminal.
|
//! We can do that using an `enum` with a unit variant for any terminal and non-terminal.
|
||||||
//! The `enum` needs to be convertible to a `u16`, so we use the `repr` attribute to ensure it uses the correct
|
//! The `enum` needs to be convertible to a `u32`, so we use the `repr` attribute to ensure it uses the correct
|
||||||
//! representation.
|
//! representation.
|
||||||
//!
|
//!
|
||||||
//! ```rust,ignore
|
//! ```rust,ignore
|
||||||
//! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
//! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
//! #[repr(u16)]
|
//! #[repr(u32)]
|
||||||
//! enum SyntaxKind {
|
//! enum SyntaxKind {
|
||||||
//! /* Tokens */
|
//! /* Tokens */
|
||||||
//! Int, // 42
|
//! Int, // 42
|
||||||
|
@ -112,7 +112,7 @@
|
||||||
//! }
|
//! }
|
||||||
//!
|
//!
|
||||||
//! fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
//! fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||||
//! RawSyntaxKind(kind as u16)
|
//! RawSyntaxKind(kind as u32)
|
||||||
//! }
|
//! }
|
||||||
//!
|
//!
|
||||||
//! fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
//! fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||||
|
@ -368,7 +368,7 @@ use std::fmt;
|
||||||
|
|
||||||
/// `RawSyntaxKind` is a type tag for each token or node.
|
/// `RawSyntaxKind` is a type tag for each token or node.
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
pub struct RawSyntaxKind(pub u16);
|
pub struct RawSyntaxKind(pub u32);
|
||||||
|
|
||||||
/// Typesafe representations of text ranges and sizes.
|
/// Typesafe representations of text ranges and sizes.
|
||||||
pub mod text {
|
pub mod text {
|
||||||
|
@ -423,7 +423,7 @@ pub mod sync {
|
||||||
/// ```
|
/// ```
|
||||||
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
/// # #[allow(non_camel_case_types)]
|
/// # #[allow(non_camel_case_types)]
|
||||||
/// #[repr(u16)]
|
/// #[repr(u32)]
|
||||||
/// enum SyntaxKind {
|
/// enum SyntaxKind {
|
||||||
/// Plus, // `+`
|
/// Plus, // `+`
|
||||||
/// Minus, // `-`
|
/// Minus, // `-`
|
||||||
|
@ -442,12 +442,12 @@ pub mod sync {
|
||||||
/// type Kind = SyntaxKind;
|
/// type Kind = SyntaxKind;
|
||||||
///
|
///
|
||||||
/// fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
/// fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||||
/// assert!(raw.0 <= __LAST as u16);
|
/// assert!(raw.0 <= __LAST as u32);
|
||||||
/// unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
/// unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
|
||||||
/// }
|
/// }
|
||||||
///
|
///
|
||||||
/// fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
/// fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||||
/// cstree::RawSyntaxKind(kind as u16)
|
/// cstree::RawSyntaxKind(kind as u32)
|
||||||
/// }
|
/// }
|
||||||
///
|
///
|
||||||
/// fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
/// fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||||
|
@ -489,7 +489,7 @@ pub mod testing {
|
||||||
pub fn parse<L: Language, I>(_b: &mut GreenNodeBuilder<L, I>, _s: &str) {}
|
pub fn parse<L: Language, I>(_b: &mut GreenNodeBuilder<L, I>, _s: &str) {}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
#[repr(u16)]
|
#[repr(u32)]
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
pub enum TestSyntaxKind {
|
pub enum TestSyntaxKind {
|
||||||
Plus,
|
Plus,
|
||||||
|
@ -510,12 +510,12 @@ pub mod testing {
|
||||||
type Kind = TestSyntaxKind;
|
type Kind = TestSyntaxKind;
|
||||||
|
|
||||||
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||||
assert!(raw.0 <= TestSyntaxKind::__LAST as u16);
|
assert!(raw.0 <= TestSyntaxKind::__LAST as u32);
|
||||||
unsafe { std::mem::transmute::<u16, TestSyntaxKind>(raw.0) }
|
unsafe { std::mem::transmute::<u32, TestSyntaxKind>(raw.0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||||
RawSyntaxKind(kind as u16)
|
RawSyntaxKind(kind as u32)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||||
|
|
|
@ -244,7 +244,7 @@ impl Serialize for RawSyntaxKind {
|
||||||
where
|
where
|
||||||
S: serde::Serializer,
|
S: serde::Serializer,
|
||||||
{
|
{
|
||||||
serializer.serialize_u16(self.0)
|
serializer.serialize_u32(self.0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -253,6 +253,6 @@ impl<'de> Deserialize<'de> for RawSyntaxKind {
|
||||||
where
|
where
|
||||||
D: serde::Deserializer<'de>,
|
D: serde::Deserializer<'de>,
|
||||||
{
|
{
|
||||||
Ok(Self(u16::deserialize(deserializer)?))
|
Ok(Self(u32::deserialize(deserializer)?))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,9 @@ mod tests {
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
|
|
||||||
assert_eq!(size_of::<SyntaxNode<TestLang>>(), size_of::<*const u8>());
|
assert_eq!(size_of::<SyntaxNode<TestLang>>(), size_of::<*const u8>());
|
||||||
assert_eq!(size_of::<Option<SyntaxNode<TestLang>>>(), size_of::<*const u8>()); // verify niche opt of `NonNull`
|
// verify niche opt of `NonNull`
|
||||||
assert_eq!(size_of::<SyntaxToken<TestLang>>(), size_of::<*const u8>() + size_of::<u32>() * 2);
|
assert_eq!(size_of::<Option<SyntaxNode<TestLang>>>(), size_of::<*const u8>());
|
||||||
|
// parent + child index + text len
|
||||||
|
assert_eq!(size_of::<SyntaxToken<TestLang>>(), size_of::<SyntaxNode<TestLang>>() + size_of::<u32>() * 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,7 +56,7 @@ where
|
||||||
node
|
node
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_recursive<L, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u16) -> u16
|
pub fn build_recursive<L, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u32) -> u32
|
||||||
where
|
where
|
||||||
L: Language<Kind = RawSyntaxKind>,
|
L: Language<Kind = RawSyntaxKind>,
|
||||||
I: Interner,
|
I: Interner,
|
||||||
|
|
|
@ -6,7 +6,7 @@ fn empty_tree_arc() {
|
||||||
use cstree::{build::GreenNodeBuilder, syntax::SyntaxNode};
|
use cstree::{build::GreenNodeBuilder, syntax::SyntaxNode};
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
#[repr(u16)]
|
#[repr(u32)]
|
||||||
enum SyntaxKind {
|
enum SyntaxKind {
|
||||||
Root,
|
Root,
|
||||||
}
|
}
|
||||||
|
@ -17,12 +17,12 @@ fn empty_tree_arc() {
|
||||||
type Kind = SyntaxKind;
|
type Kind = SyntaxKind;
|
||||||
|
|
||||||
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||||
assert!(raw.0 <= SyntaxKind::Root as u16);
|
assert!(raw.0 <= SyntaxKind::Root as u32);
|
||||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||||
cstree::RawSyntaxKind(kind as u16)
|
cstree::RawSyntaxKind(kind as u32)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn static_text(_kind: Self::Kind) -> Option<&'static str> {
|
fn static_text(_kind: Self::Kind) -> Option<&'static str> {
|
||||||
|
|
|
@ -26,7 +26,7 @@ macro_rules! event_tokens {
|
||||||
Token::BorrowedStr("Token"),
|
Token::BorrowedStr("Token"),
|
||||||
Token::BorrowedStr("c"),
|
Token::BorrowedStr("c"),
|
||||||
Token::Tuple { len: 2 },
|
Token::Tuple { len: 2 },
|
||||||
Token::U16($kind),
|
Token::U32($kind),
|
||||||
Token::BorrowedStr($str),
|
Token::BorrowedStr($str),
|
||||||
Token::TupleEnd,
|
Token::TupleEnd,
|
||||||
Token::StructEnd,
|
Token::StructEnd,
|
||||||
|
@ -40,7 +40,7 @@ macro_rules! event_tokens {
|
||||||
Token::BorrowedStr("EnterNode"),
|
Token::BorrowedStr("EnterNode"),
|
||||||
Token::BorrowedStr("c"),
|
Token::BorrowedStr("c"),
|
||||||
Token::Tuple { len: 2 },
|
Token::Tuple { len: 2 },
|
||||||
Token::U16($kind),
|
Token::U32($kind),
|
||||||
Token::Bool($data),
|
Token::Bool($data),
|
||||||
Token::TupleEnd,
|
Token::TupleEnd,
|
||||||
Token::StructEnd,
|
Token::StructEnd,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue