1
Fork 0
mirror of https://github.com/RGBCube/cstree synced 2025-07-27 09:07:44 +00:00

Use 32 bits for RawSyntaxKind internally (#49)

This commit is contained in:
DQ 2023-04-07 19:18:47 +02:00 committed by GitHub
parent 8750498b7a
commit 2aa543036f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 42 additions and 39 deletions

View file

@ -11,6 +11,7 @@
* Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements. * Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements.
* Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option<SyntaxNode>` is now the same size as `SyntaxNode` itself: the size of a pointer). * Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option<SyntaxNode>` is now the same size as `SyntaxNode` itself: the size of a pointer).
* `SyntaxKind` has been renamed to `RawSyntaxKind` to no longer conflict with user-defined `SyntaxKind` enumerations. * `SyntaxKind` has been renamed to `RawSyntaxKind` to no longer conflict with user-defined `SyntaxKind` enumerations.
* `RawSyntaxKind` has been changed to use a 32-bit index internally, which means existing `Language` implementations and syntax kind `enum`s need to be adjusted to `#[repr(u32)]` and the corresponding conversions.
* The crate's export module structure has been reorganized to give different groups of definitions their own submodules. A `cstree::prelude` module is available, containing the most commonly needed types that were previously accessible via `use cstree::*`. Otherwise, the module structure is now as follows: * The crate's export module structure has been reorganized to give different groups of definitions their own submodules. A `cstree::prelude` module is available, containing the most commonly needed types that were previously accessible via `use cstree::*`. Otherwise, the module structure is now as follows:
* `cstree` * `cstree`
* `Language` * `Language`

View file

@ -16,7 +16,7 @@ pub enum Element<'s> {
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub enum TestKind { pub enum TestKind {
Element { n: u16 }, Element { n: u32 },
Plus, Plus,
} }
@ -45,7 +45,7 @@ impl<T: Bool> Language for TestLang<T> {
type Kind = TestKind; type Kind = TestKind;
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind { fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
if raw.0 == u16::MAX - 1 { if raw.0 == u32::MAX - 1 {
TestKind::Plus TestKind::Plus
} else { } else {
TestKind::Element { n: raw.0 } TestKind::Element { n: raw.0 }
@ -55,7 +55,7 @@ impl<T: Bool> Language for TestLang<T> {
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind { fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
match kind { match kind {
TestKind::Element { n } => RawSyntaxKind(n), TestKind::Element { n } => RawSyntaxKind(n),
TestKind::Plus => RawSyntaxKind(u16::MAX - 1), TestKind::Plus => RawSyntaxKind(u32::MAX - 1),
} }
} }
@ -85,8 +85,8 @@ where
pub fn build_recursive<T: Bool, I>( pub fn build_recursive<T: Bool, I>(
root: &Element<'_>, root: &Element<'_>,
builder: &mut GreenNodeBuilder<'_, '_, TestLang<T>, I>, builder: &mut GreenNodeBuilder<'_, '_, TestLang<T>, I>,
mut from: u16, mut from: u32,
) -> u16 ) -> u32
where where
I: Interner, I: Interner,
{ {

View file

@ -17,7 +17,7 @@ use cstree::{build::GreenNodeBuilder, interning::Resolver, util::NodeOrToken};
use std::iter::Peekable; use std::iter::Peekable;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)] #[repr(u32)]
enum SyntaxKind { enum SyntaxKind {
Whitespace = 0, Whitespace = 0,
@ -35,7 +35,7 @@ use SyntaxKind::*;
impl From<SyntaxKind> for cstree::RawSyntaxKind { impl From<SyntaxKind> for cstree::RawSyntaxKind {
fn from(kind: SyntaxKind) -> Self { fn from(kind: SyntaxKind) -> Self {
Self(kind as u16) Self(kind as u32)
} }
} }
@ -45,8 +45,8 @@ impl cstree::Language for Lang {
type Kind = SyntaxKind; type Kind = SyntaxKind;
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind { fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
assert!(raw.0 <= Root as u16); assert!(raw.0 <= Root as u32);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) } unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
} }
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind { fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {

View file

@ -7,7 +7,7 @@ use cstree::{
}; };
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)] #[repr(u32)]
pub enum SyntaxKind { pub enum SyntaxKind {
/* Tokens */ /* Tokens */
Int, // 42 Int, // 42
@ -42,7 +42,7 @@ impl Language for Calculator {
} }
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind { fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
RawSyntaxKind(kind as u16) RawSyntaxKind(kind as u32)
} }
fn static_text(kind: Self::Kind) -> Option<&'static str> { fn static_text(kind: Self::Kind) -> Option<&'static str> {

View file

@ -9,7 +9,7 @@
/// Let's start with defining all kinds of tokens and composite nodes. /// Let's start with defining all kinds of tokens and composite nodes.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)] #[repr(u32)]
pub enum SyntaxKind { pub enum SyntaxKind {
LParen = 0, // '(' LParen = 0, // '('
RParen, // ')' RParen, // ')'
@ -32,7 +32,7 @@ use SyntaxKind::*;
/// First, to easily pass the enum variants into cstree via `.into()`: /// First, to easily pass the enum variants into cstree via `.into()`:
impl From<SyntaxKind> for cstree::RawSyntaxKind { impl From<SyntaxKind> for cstree::RawSyntaxKind {
fn from(kind: SyntaxKind) -> Self { fn from(kind: SyntaxKind) -> Self {
Self(kind as u16) Self(kind as u32)
} }
} }
@ -45,8 +45,8 @@ impl cstree::Language for Lang {
type Kind = SyntaxKind; type Kind = SyntaxKind;
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind { fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
assert!(raw.0 <= Root as u16); assert!(raw.0 <= Root as u32);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) } unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
} }
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind { fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
@ -421,7 +421,7 @@ nan
/// Split the input string into a flat list of tokens (such as L_PAREN, WORD, and WHITESPACE) /// Split the input string into a flat list of tokens (such as L_PAREN, WORD, and WHITESPACE)
fn lex(text: &str) -> VecDeque<(SyntaxKind, &str)> { fn lex(text: &str) -> VecDeque<(SyntaxKind, &str)> {
fn tok(t: SyntaxKind) -> m_lexer::TokenKind { fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0) m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0 as u16)
} }
fn kind(t: m_lexer::TokenKind) -> SyntaxKind { fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
match t.0 { match t.0 {

View file

@ -58,12 +58,12 @@
//! //!
//! First, we need to list the different part of our language's grammar. //! First, we need to list the different part of our language's grammar.
//! We can do that using an `enum` with a unit variant for any terminal and non-terminal. //! We can do that using an `enum` with a unit variant for any terminal and non-terminal.
//! The `enum` needs to be convertible to a `u16`, so we use the `repr` attribute to ensure it uses the correct //! The `enum` needs to be convertible to a `u32`, so we use the `repr` attribute to ensure it uses the correct
//! representation. //! representation.
//! //!
//! ```rust,ignore //! ```rust,ignore
//! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] //! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
//! #[repr(u16)] //! #[repr(u32)]
//! enum SyntaxKind { //! enum SyntaxKind {
//! /* Tokens */ //! /* Tokens */
//! Int, // 42 //! Int, // 42
@ -112,7 +112,7 @@
//! } //! }
//! //!
//! fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind { //! fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
//! RawSyntaxKind(kind as u16) //! RawSyntaxKind(kind as u32)
//! } //! }
//! //!
//! fn static_text(kind: Self::Kind) -> Option<&'static str> { //! fn static_text(kind: Self::Kind) -> Option<&'static str> {
@ -368,7 +368,7 @@ use std::fmt;
/// `RawSyntaxKind` is a type tag for each token or node. /// `RawSyntaxKind` is a type tag for each token or node.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct RawSyntaxKind(pub u16); pub struct RawSyntaxKind(pub u32);
/// Typesafe representations of text ranges and sizes. /// Typesafe representations of text ranges and sizes.
pub mod text { pub mod text {
@ -423,7 +423,7 @@ pub mod sync {
/// ``` /// ```
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] /// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// # #[allow(non_camel_case_types)] /// # #[allow(non_camel_case_types)]
/// #[repr(u16)] /// #[repr(u32)]
/// enum SyntaxKind { /// enum SyntaxKind {
/// Plus, // `+` /// Plus, // `+`
/// Minus, // `-` /// Minus, // `-`
@ -442,12 +442,12 @@ pub mod sync {
/// type Kind = SyntaxKind; /// type Kind = SyntaxKind;
/// ///
/// fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind { /// fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
/// assert!(raw.0 <= __LAST as u16); /// assert!(raw.0 <= __LAST as u32);
/// unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) } /// unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
/// } /// }
/// ///
/// fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind { /// fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
/// cstree::RawSyntaxKind(kind as u16) /// cstree::RawSyntaxKind(kind as u32)
/// } /// }
/// ///
/// fn static_text(kind: Self::Kind) -> Option<&'static str> { /// fn static_text(kind: Self::Kind) -> Option<&'static str> {
@ -489,7 +489,7 @@ pub mod testing {
pub fn parse<L: Language, I>(_b: &mut GreenNodeBuilder<L, I>, _s: &str) {} pub fn parse<L: Language, I>(_b: &mut GreenNodeBuilder<L, I>, _s: &str) {}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)] #[repr(u32)]
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
pub enum TestSyntaxKind { pub enum TestSyntaxKind {
Plus, Plus,
@ -510,12 +510,12 @@ pub mod testing {
type Kind = TestSyntaxKind; type Kind = TestSyntaxKind;
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind { fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
assert!(raw.0 <= TestSyntaxKind::__LAST as u16); assert!(raw.0 <= TestSyntaxKind::__LAST as u32);
unsafe { std::mem::transmute::<u16, TestSyntaxKind>(raw.0) } unsafe { std::mem::transmute::<u32, TestSyntaxKind>(raw.0) }
} }
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind { fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
RawSyntaxKind(kind as u16) RawSyntaxKind(kind as u32)
} }
fn static_text(kind: Self::Kind) -> Option<&'static str> { fn static_text(kind: Self::Kind) -> Option<&'static str> {

View file

@ -244,7 +244,7 @@ impl Serialize for RawSyntaxKind {
where where
S: serde::Serializer, S: serde::Serializer,
{ {
serializer.serialize_u16(self.0) serializer.serialize_u32(self.0)
} }
} }
@ -253,6 +253,6 @@ impl<'de> Deserialize<'de> for RawSyntaxKind {
where where
D: serde::Deserializer<'de>, D: serde::Deserializer<'de>,
{ {
Ok(Self(u16::deserialize(deserializer)?)) Ok(Self(u32::deserialize(deserializer)?))
} }
} }

View file

@ -61,7 +61,9 @@ mod tests {
use std::mem::size_of; use std::mem::size_of;
assert_eq!(size_of::<SyntaxNode<TestLang>>(), size_of::<*const u8>()); assert_eq!(size_of::<SyntaxNode<TestLang>>(), size_of::<*const u8>());
assert_eq!(size_of::<Option<SyntaxNode<TestLang>>>(), size_of::<*const u8>()); // verify niche opt of `NonNull` // verify niche opt of `NonNull`
assert_eq!(size_of::<SyntaxToken<TestLang>>(), size_of::<*const u8>() + size_of::<u32>() * 2); assert_eq!(size_of::<Option<SyntaxNode<TestLang>>>(), size_of::<*const u8>());
// parent + child index + text len
assert_eq!(size_of::<SyntaxToken<TestLang>>(), size_of::<SyntaxNode<TestLang>>() + size_of::<u32>() * 2);
} }
} }

View file

@ -56,7 +56,7 @@ where
node node
} }
pub fn build_recursive<L, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u16) -> u16 pub fn build_recursive<L, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u32) -> u32
where where
L: Language<Kind = RawSyntaxKind>, L: Language<Kind = RawSyntaxKind>,
I: Interner, I: Interner,

View file

@ -6,7 +6,7 @@ fn empty_tree_arc() {
use cstree::{build::GreenNodeBuilder, syntax::SyntaxNode}; use cstree::{build::GreenNodeBuilder, syntax::SyntaxNode};
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)] #[repr(u32)]
enum SyntaxKind { enum SyntaxKind {
Root, Root,
} }
@ -17,12 +17,12 @@ fn empty_tree_arc() {
type Kind = SyntaxKind; type Kind = SyntaxKind;
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind { fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
assert!(raw.0 <= SyntaxKind::Root as u16); assert!(raw.0 <= SyntaxKind::Root as u32);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) } unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
} }
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind { fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
cstree::RawSyntaxKind(kind as u16) cstree::RawSyntaxKind(kind as u32)
} }
fn static_text(_kind: Self::Kind) -> Option<&'static str> { fn static_text(_kind: Self::Kind) -> Option<&'static str> {

View file

@ -26,7 +26,7 @@ macro_rules! event_tokens {
Token::BorrowedStr("Token"), Token::BorrowedStr("Token"),
Token::BorrowedStr("c"), Token::BorrowedStr("c"),
Token::Tuple { len: 2 }, Token::Tuple { len: 2 },
Token::U16($kind), Token::U32($kind),
Token::BorrowedStr($str), Token::BorrowedStr($str),
Token::TupleEnd, Token::TupleEnd,
Token::StructEnd, Token::StructEnd,
@ -40,7 +40,7 @@ macro_rules! event_tokens {
Token::BorrowedStr("EnterNode"), Token::BorrowedStr("EnterNode"),
Token::BorrowedStr("c"), Token::BorrowedStr("c"),
Token::Tuple { len: 2 }, Token::Tuple { len: 2 },
Token::U16($kind), Token::U32($kind),
Token::Bool($data), Token::Bool($data),
Token::TupleEnd, Token::TupleEnd,
Token::StructEnd, Token::StructEnd,