mirror of
https://github.com/RGBCube/cstree
synced 2025-07-27 09:07:44 +00:00
Use 32 bits for RawSyntaxKind
internally (#49)
This commit is contained in:
parent
8750498b7a
commit
2aa543036f
11 changed files with 42 additions and 39 deletions
|
@ -11,6 +11,7 @@
|
|||
* Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements.
|
||||
* Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option<SyntaxNode>` is now the same size as `SyntaxNode` itself: the size of a pointer).
|
||||
* `SyntaxKind` has been renamed to `RawSyntaxKind` to no longer conflict with user-defined `SyntaxKind` enumerations.
|
||||
* `RawSyntaxKind` has been changed to use a 32-bit index internally, which means existing `Language` implementations and syntax kind `enum`s need to be adjusted to `#[repr(u32)]` and the corresponding conversions.
|
||||
* The crate's export module structure has been reorganized to give different groups of definitions their own submodules. A `cstree::prelude` module is available, containing the most commonly needed types that were previously accessible via `use cstree::*`. Otherwise, the module structure is now as follows:
|
||||
* `cstree`
|
||||
* `Language`
|
||||
|
|
|
@ -16,7 +16,7 @@ pub enum Element<'s> {
|
|||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum TestKind {
|
||||
Element { n: u16 },
|
||||
Element { n: u32 },
|
||||
Plus,
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,7 @@ impl<T: Bool> Language for TestLang<T> {
|
|||
type Kind = TestKind;
|
||||
|
||||
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||
if raw.0 == u16::MAX - 1 {
|
||||
if raw.0 == u32::MAX - 1 {
|
||||
TestKind::Plus
|
||||
} else {
|
||||
TestKind::Element { n: raw.0 }
|
||||
|
@ -55,7 +55,7 @@ impl<T: Bool> Language for TestLang<T> {
|
|||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||
match kind {
|
||||
TestKind::Element { n } => RawSyntaxKind(n),
|
||||
TestKind::Plus => RawSyntaxKind(u16::MAX - 1),
|
||||
TestKind::Plus => RawSyntaxKind(u32::MAX - 1),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -85,8 +85,8 @@ where
|
|||
pub fn build_recursive<T: Bool, I>(
|
||||
root: &Element<'_>,
|
||||
builder: &mut GreenNodeBuilder<'_, '_, TestLang<T>, I>,
|
||||
mut from: u16,
|
||||
) -> u16
|
||||
mut from: u32,
|
||||
) -> u32
|
||||
where
|
||||
I: Interner,
|
||||
{
|
||||
|
|
|
@ -17,7 +17,7 @@ use cstree::{build::GreenNodeBuilder, interning::Resolver, util::NodeOrToken};
|
|||
use std::iter::Peekable;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[repr(u16)]
|
||||
#[repr(u32)]
|
||||
enum SyntaxKind {
|
||||
Whitespace = 0,
|
||||
|
||||
|
@ -35,7 +35,7 @@ use SyntaxKind::*;
|
|||
|
||||
impl From<SyntaxKind> for cstree::RawSyntaxKind {
|
||||
fn from(kind: SyntaxKind) -> Self {
|
||||
Self(kind as u16)
|
||||
Self(kind as u32)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -45,8 +45,8 @@ impl cstree::Language for Lang {
|
|||
type Kind = SyntaxKind;
|
||||
|
||||
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||
assert!(raw.0 <= Root as u16);
|
||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||
assert!(raw.0 <= Root as u32);
|
||||
unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||
|
|
|
@ -7,7 +7,7 @@ use cstree::{
|
|||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[repr(u16)]
|
||||
#[repr(u32)]
|
||||
pub enum SyntaxKind {
|
||||
/* Tokens */
|
||||
Int, // 42
|
||||
|
@ -42,7 +42,7 @@ impl Language for Calculator {
|
|||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||
RawSyntaxKind(kind as u16)
|
||||
RawSyntaxKind(kind as u32)
|
||||
}
|
||||
|
||||
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
/// Let's start with defining all kinds of tokens and composite nodes.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[repr(u16)]
|
||||
#[repr(u32)]
|
||||
pub enum SyntaxKind {
|
||||
LParen = 0, // '('
|
||||
RParen, // ')'
|
||||
|
@ -32,7 +32,7 @@ use SyntaxKind::*;
|
|||
/// First, to easily pass the enum variants into cstree via `.into()`:
|
||||
impl From<SyntaxKind> for cstree::RawSyntaxKind {
|
||||
fn from(kind: SyntaxKind) -> Self {
|
||||
Self(kind as u16)
|
||||
Self(kind as u32)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -45,8 +45,8 @@ impl cstree::Language for Lang {
|
|||
type Kind = SyntaxKind;
|
||||
|
||||
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||
assert!(raw.0 <= Root as u16);
|
||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||
assert!(raw.0 <= Root as u32);
|
||||
unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||
|
@ -421,7 +421,7 @@ nan
|
|||
/// Split the input string into a flat list of tokens (such as L_PAREN, WORD, and WHITESPACE)
|
||||
fn lex(text: &str) -> VecDeque<(SyntaxKind, &str)> {
|
||||
fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
|
||||
m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0)
|
||||
m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0 as u16)
|
||||
}
|
||||
fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
|
||||
match t.0 {
|
||||
|
|
24
src/lib.rs
24
src/lib.rs
|
@ -58,12 +58,12 @@
|
|||
//!
|
||||
//! First, we need to list the different part of our language's grammar.
|
||||
//! We can do that using an `enum` with a unit variant for any terminal and non-terminal.
|
||||
//! The `enum` needs to be convertible to a `u16`, so we use the `repr` attribute to ensure it uses the correct
|
||||
//! The `enum` needs to be convertible to a `u32`, so we use the `repr` attribute to ensure it uses the correct
|
||||
//! representation.
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
//! #[repr(u16)]
|
||||
//! #[repr(u32)]
|
||||
//! enum SyntaxKind {
|
||||
//! /* Tokens */
|
||||
//! Int, // 42
|
||||
|
@ -112,7 +112,7 @@
|
|||
//! }
|
||||
//!
|
||||
//! fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||
//! RawSyntaxKind(kind as u16)
|
||||
//! RawSyntaxKind(kind as u32)
|
||||
//! }
|
||||
//!
|
||||
//! fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||
|
@ -368,7 +368,7 @@ use std::fmt;
|
|||
|
||||
/// `RawSyntaxKind` is a type tag for each token or node.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct RawSyntaxKind(pub u16);
|
||||
pub struct RawSyntaxKind(pub u32);
|
||||
|
||||
/// Typesafe representations of text ranges and sizes.
|
||||
pub mod text {
|
||||
|
@ -423,7 +423,7 @@ pub mod sync {
|
|||
/// ```
|
||||
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
/// # #[allow(non_camel_case_types)]
|
||||
/// #[repr(u16)]
|
||||
/// #[repr(u32)]
|
||||
/// enum SyntaxKind {
|
||||
/// Plus, // `+`
|
||||
/// Minus, // `-`
|
||||
|
@ -442,12 +442,12 @@ pub mod sync {
|
|||
/// type Kind = SyntaxKind;
|
||||
///
|
||||
/// fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||
/// assert!(raw.0 <= __LAST as u16);
|
||||
/// unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||
/// assert!(raw.0 <= __LAST as u32);
|
||||
/// unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
|
||||
/// }
|
||||
///
|
||||
/// fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||
/// cstree::RawSyntaxKind(kind as u16)
|
||||
/// cstree::RawSyntaxKind(kind as u32)
|
||||
/// }
|
||||
///
|
||||
/// fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||
|
@ -489,7 +489,7 @@ pub mod testing {
|
|||
pub fn parse<L: Language, I>(_b: &mut GreenNodeBuilder<L, I>, _s: &str) {}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[repr(u16)]
|
||||
#[repr(u32)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum TestSyntaxKind {
|
||||
Plus,
|
||||
|
@ -510,12 +510,12 @@ pub mod testing {
|
|||
type Kind = TestSyntaxKind;
|
||||
|
||||
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
|
||||
assert!(raw.0 <= TestSyntaxKind::__LAST as u16);
|
||||
unsafe { std::mem::transmute::<u16, TestSyntaxKind>(raw.0) }
|
||||
assert!(raw.0 <= TestSyntaxKind::__LAST as u32);
|
||||
unsafe { std::mem::transmute::<u32, TestSyntaxKind>(raw.0) }
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
|
||||
RawSyntaxKind(kind as u16)
|
||||
RawSyntaxKind(kind as u32)
|
||||
}
|
||||
|
||||
fn static_text(kind: Self::Kind) -> Option<&'static str> {
|
||||
|
|
|
@ -244,7 +244,7 @@ impl Serialize for RawSyntaxKind {
|
|||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
serializer.serialize_u16(self.0)
|
||||
serializer.serialize_u32(self.0)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -253,6 +253,6 @@ impl<'de> Deserialize<'de> for RawSyntaxKind {
|
|||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
Ok(Self(u16::deserialize(deserializer)?))
|
||||
Ok(Self(u32::deserialize(deserializer)?))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -61,7 +61,9 @@ mod tests {
|
|||
use std::mem::size_of;
|
||||
|
||||
assert_eq!(size_of::<SyntaxNode<TestLang>>(), size_of::<*const u8>());
|
||||
assert_eq!(size_of::<Option<SyntaxNode<TestLang>>>(), size_of::<*const u8>()); // verify niche opt of `NonNull`
|
||||
assert_eq!(size_of::<SyntaxToken<TestLang>>(), size_of::<*const u8>() + size_of::<u32>() * 2);
|
||||
// verify niche opt of `NonNull`
|
||||
assert_eq!(size_of::<Option<SyntaxNode<TestLang>>>(), size_of::<*const u8>());
|
||||
// parent + child index + text len
|
||||
assert_eq!(size_of::<SyntaxToken<TestLang>>(), size_of::<SyntaxNode<TestLang>>() + size_of::<u32>() * 2);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,7 +56,7 @@ where
|
|||
node
|
||||
}
|
||||
|
||||
pub fn build_recursive<L, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u16) -> u16
|
||||
pub fn build_recursive<L, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u32) -> u32
|
||||
where
|
||||
L: Language<Kind = RawSyntaxKind>,
|
||||
I: Interner,
|
||||
|
|
|
@ -6,7 +6,7 @@ fn empty_tree_arc() {
|
|||
use cstree::{build::GreenNodeBuilder, syntax::SyntaxNode};
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[repr(u16)]
|
||||
#[repr(u32)]
|
||||
enum SyntaxKind {
|
||||
Root,
|
||||
}
|
||||
|
@ -17,12 +17,12 @@ fn empty_tree_arc() {
|
|||
type Kind = SyntaxKind;
|
||||
|
||||
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
|
||||
assert!(raw.0 <= SyntaxKind::Root as u16);
|
||||
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
|
||||
assert!(raw.0 <= SyntaxKind::Root as u32);
|
||||
unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
|
||||
}
|
||||
|
||||
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
|
||||
cstree::RawSyntaxKind(kind as u16)
|
||||
cstree::RawSyntaxKind(kind as u32)
|
||||
}
|
||||
|
||||
fn static_text(_kind: Self::Kind) -> Option<&'static str> {
|
||||
|
|
|
@ -26,7 +26,7 @@ macro_rules! event_tokens {
|
|||
Token::BorrowedStr("Token"),
|
||||
Token::BorrowedStr("c"),
|
||||
Token::Tuple { len: 2 },
|
||||
Token::U16($kind),
|
||||
Token::U32($kind),
|
||||
Token::BorrowedStr($str),
|
||||
Token::TupleEnd,
|
||||
Token::StructEnd,
|
||||
|
@ -40,7 +40,7 @@ macro_rules! event_tokens {
|
|||
Token::BorrowedStr("EnterNode"),
|
||||
Token::BorrowedStr("c"),
|
||||
Token::Tuple { len: 2 },
|
||||
Token::U16($kind),
|
||||
Token::U32($kind),
|
||||
Token::Bool($data),
|
||||
Token::TupleEnd,
|
||||
Token::StructEnd,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue