diff --git a/CHANGELOG.md b/CHANGELOG.md index caa6ac2..29892dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements. * Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option` is now the same size as `SyntaxNode` itself: the size of a pointer). * `SyntaxKind` has been renamed to `RawSyntaxKind` to no longer conflict with user-defined `SyntaxKind` enumerations. + * `RawSyntaxKind` has been changed to use a 32-bit index internally, which means existing `Language` implementations and syntax kind `enum`s need to be adjusted to `#[repr(u32)]` and the corresponding conversions. * The crate's export module structure has been reorganized to give different groups of definitions their own submodules. A `cstree::prelude` module is available, containing the most commonly needed types that were previously accessible via `use cstree::*`. Otherwise, the module structure is now as follows: * `cstree` * `Language` diff --git a/benches/main.rs b/benches/main.rs index f1373b5..3f95608 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -16,7 +16,7 @@ pub enum Element<'s> { #[derive(Debug, Clone, Copy)] pub enum TestKind { - Element { n: u16 }, + Element { n: u32 }, Plus, } @@ -45,7 +45,7 @@ impl Language for TestLang { type Kind = TestKind; fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind { - if raw.0 == u16::MAX - 1 { + if raw.0 == u32::MAX - 1 { TestKind::Plus } else { TestKind::Element { n: raw.0 } @@ -55,7 +55,7 @@ impl Language for TestLang { fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind { match kind { TestKind::Element { n } => RawSyntaxKind(n), - TestKind::Plus => RawSyntaxKind(u16::MAX - 1), + TestKind::Plus => RawSyntaxKind(u32::MAX - 1), } } @@ -85,8 +85,8 @@ where pub fn build_recursive( root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, TestLang, I>, - mut from: u16, -) -> u16 + mut from: u32, +) -> u32 where I: Interner, { diff --git a/examples/math.rs b/examples/math.rs index e2346ac..3b00957 100644 --- a/examples/math.rs +++ b/examples/math.rs @@ -17,7 +17,7 @@ use cstree::{build::GreenNodeBuilder, interning::Resolver, util::NodeOrToken}; use std::iter::Peekable; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[repr(u16)] +#[repr(u32)] enum SyntaxKind { Whitespace = 0, @@ -35,7 +35,7 @@ use SyntaxKind::*; impl From for cstree::RawSyntaxKind { fn from(kind: SyntaxKind) -> Self { - Self(kind as u16) + Self(kind as u32) } } @@ -45,8 +45,8 @@ impl cstree::Language for Lang { type Kind = SyntaxKind; fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind { - assert!(raw.0 <= Root as u16); - unsafe { std::mem::transmute::(raw.0) } + assert!(raw.0 <= Root as u32); + unsafe { std::mem::transmute::(raw.0) } } fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind { diff --git a/examples/readme.rs b/examples/readme.rs index 6e3890d..ff0f083 100644 --- a/examples/readme.rs +++ b/examples/readme.rs @@ -7,7 +7,7 @@ use cstree::{ }; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[repr(u16)] +#[repr(u32)] pub enum SyntaxKind { /* Tokens */ Int, // 42 @@ -42,7 +42,7 @@ impl Language for Calculator { } fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind { - RawSyntaxKind(kind as u16) + RawSyntaxKind(kind as u32) } fn static_text(kind: Self::Kind) -> Option<&'static str> { diff --git a/examples/s_expressions.rs b/examples/s_expressions.rs index bed6832..3311150 100644 --- a/examples/s_expressions.rs +++ b/examples/s_expressions.rs @@ -9,7 +9,7 @@ /// Let's start with defining all kinds of tokens and composite nodes. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[repr(u16)] +#[repr(u32)] pub enum SyntaxKind { LParen = 0, // '(' RParen, // ')' @@ -32,7 +32,7 @@ use SyntaxKind::*; /// First, to easily pass the enum variants into cstree via `.into()`: impl From for cstree::RawSyntaxKind { fn from(kind: SyntaxKind) -> Self { - Self(kind as u16) + Self(kind as u32) } } @@ -45,8 +45,8 @@ impl cstree::Language for Lang { type Kind = SyntaxKind; fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind { - assert!(raw.0 <= Root as u16); - unsafe { std::mem::transmute::(raw.0) } + assert!(raw.0 <= Root as u32); + unsafe { std::mem::transmute::(raw.0) } } fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind { @@ -421,7 +421,7 @@ nan /// Split the input string into a flat list of tokens (such as L_PAREN, WORD, and WHITESPACE) fn lex(text: &str) -> VecDeque<(SyntaxKind, &str)> { fn tok(t: SyntaxKind) -> m_lexer::TokenKind { - m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0) + m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0 as u16) } fn kind(t: m_lexer::TokenKind) -> SyntaxKind { match t.0 { diff --git a/src/lib.rs b/src/lib.rs index dfa934b..41d33f2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,12 +58,12 @@ //! //! First, we need to list the different part of our language's grammar. //! We can do that using an `enum` with a unit variant for any terminal and non-terminal. -//! The `enum` needs to be convertible to a `u16`, so we use the `repr` attribute to ensure it uses the correct +//! The `enum` needs to be convertible to a `u32`, so we use the `repr` attribute to ensure it uses the correct //! representation. //! //! ```rust,ignore //! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -//! #[repr(u16)] +//! #[repr(u32)] //! enum SyntaxKind { //! /* Tokens */ //! Int, // 42 @@ -112,7 +112,7 @@ //! } //! //! fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind { -//! RawSyntaxKind(kind as u16) +//! RawSyntaxKind(kind as u32) //! } //! //! fn static_text(kind: Self::Kind) -> Option<&'static str> { @@ -368,7 +368,7 @@ use std::fmt; /// `RawSyntaxKind` is a type tag for each token or node. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct RawSyntaxKind(pub u16); +pub struct RawSyntaxKind(pub u32); /// Typesafe representations of text ranges and sizes. pub mod text { @@ -423,7 +423,7 @@ pub mod sync { /// ``` /// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] /// # #[allow(non_camel_case_types)] -/// #[repr(u16)] +/// #[repr(u32)] /// enum SyntaxKind { /// Plus, // `+` /// Minus, // `-` @@ -442,12 +442,12 @@ pub mod sync { /// type Kind = SyntaxKind; /// /// fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind { -/// assert!(raw.0 <= __LAST as u16); -/// unsafe { std::mem::transmute::(raw.0) } +/// assert!(raw.0 <= __LAST as u32); +/// unsafe { std::mem::transmute::(raw.0) } /// } /// /// fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind { -/// cstree::RawSyntaxKind(kind as u16) +/// cstree::RawSyntaxKind(kind as u32) /// } /// /// fn static_text(kind: Self::Kind) -> Option<&'static str> { @@ -489,7 +489,7 @@ pub mod testing { pub fn parse(_b: &mut GreenNodeBuilder, _s: &str) {} #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] - #[repr(u16)] + #[repr(u32)] #[allow(non_camel_case_types)] pub enum TestSyntaxKind { Plus, @@ -510,12 +510,12 @@ pub mod testing { type Kind = TestSyntaxKind; fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind { - assert!(raw.0 <= TestSyntaxKind::__LAST as u16); - unsafe { std::mem::transmute::(raw.0) } + assert!(raw.0 <= TestSyntaxKind::__LAST as u32); + unsafe { std::mem::transmute::(raw.0) } } fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind { - RawSyntaxKind(kind as u16) + RawSyntaxKind(kind as u32) } fn static_text(kind: Self::Kind) -> Option<&'static str> { diff --git a/src/serde_impls.rs b/src/serde_impls.rs index ecf0731..daa8ffb 100644 --- a/src/serde_impls.rs +++ b/src/serde_impls.rs @@ -244,7 +244,7 @@ impl Serialize for RawSyntaxKind { where S: serde::Serializer, { - serializer.serialize_u16(self.0) + serializer.serialize_u32(self.0) } } @@ -253,6 +253,6 @@ impl<'de> Deserialize<'de> for RawSyntaxKind { where D: serde::Deserializer<'de>, { - Ok(Self(u16::deserialize(deserializer)?)) + Ok(Self(u32::deserialize(deserializer)?)) } } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index dd274eb..e275c04 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -61,7 +61,9 @@ mod tests { use std::mem::size_of; assert_eq!(size_of::>(), size_of::<*const u8>()); - assert_eq!(size_of::>>(), size_of::<*const u8>()); // verify niche opt of `NonNull` - assert_eq!(size_of::>(), size_of::<*const u8>() + size_of::() * 2); + // verify niche opt of `NonNull` + assert_eq!(size_of::>>(), size_of::<*const u8>()); + // parent + child index + text len + assert_eq!(size_of::>(), size_of::>() + size_of::() * 2); } } diff --git a/tests/it/main.rs b/tests/it/main.rs index c3777c3..256856f 100644 --- a/tests/it/main.rs +++ b/tests/it/main.rs @@ -56,7 +56,7 @@ where node } -pub fn build_recursive(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u16) -> u16 +pub fn build_recursive(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u32) -> u32 where L: Language, I: Interner, diff --git a/tests/it/regressions.rs b/tests/it/regressions.rs index 12d5294..d65aebd 100644 --- a/tests/it/regressions.rs +++ b/tests/it/regressions.rs @@ -6,7 +6,7 @@ fn empty_tree_arc() { use cstree::{build::GreenNodeBuilder, syntax::SyntaxNode}; #[allow(non_camel_case_types)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] - #[repr(u16)] + #[repr(u32)] enum SyntaxKind { Root, } @@ -17,12 +17,12 @@ fn empty_tree_arc() { type Kind = SyntaxKind; fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind { - assert!(raw.0 <= SyntaxKind::Root as u16); - unsafe { std::mem::transmute::(raw.0) } + assert!(raw.0 <= SyntaxKind::Root as u32); + unsafe { std::mem::transmute::(raw.0) } } fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind { - cstree::RawSyntaxKind(kind as u16) + cstree::RawSyntaxKind(kind as u32) } fn static_text(_kind: Self::Kind) -> Option<&'static str> { diff --git a/tests/it/serde.rs b/tests/it/serde.rs index f8432b2..7f7e42e 100644 --- a/tests/it/serde.rs +++ b/tests/it/serde.rs @@ -26,7 +26,7 @@ macro_rules! event_tokens { Token::BorrowedStr("Token"), Token::BorrowedStr("c"), Token::Tuple { len: 2 }, - Token::U16($kind), + Token::U32($kind), Token::BorrowedStr($str), Token::TupleEnd, Token::StructEnd, @@ -40,7 +40,7 @@ macro_rules! event_tokens { Token::BorrowedStr("EnterNode"), Token::BorrowedStr("c"), Token::Tuple { len: 2 }, - Token::U16($kind), + Token::U32($kind), Token::Bool($data), Token::TupleEnd, Token::StructEnd,