1
Fork 0
mirror of https://github.com/RGBCube/cstree synced 2025-07-27 09:07:44 +00:00

Use 32 bits for RawSyntaxKind internally (#49)

This commit is contained in:
DQ 2023-04-07 19:18:47 +02:00 committed by GitHub
parent 8750498b7a
commit 2aa543036f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 42 additions and 39 deletions

View file

@ -11,6 +11,7 @@
* Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements.
* Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option<SyntaxNode>` is now the same size as `SyntaxNode` itself: the size of a pointer).
* `SyntaxKind` has been renamed to `RawSyntaxKind` to no longer conflict with user-defined `SyntaxKind` enumerations.
* `RawSyntaxKind` has been changed to use a 32-bit index internally, which means existing `Language` implementations and syntax kind `enum`s need to be adjusted to `#[repr(u32)]` and the corresponding conversions.
* The crate's export module structure has been reorganized to give different groups of definitions their own submodules. A `cstree::prelude` module is available, containing the most commonly needed types that were previously accessible via `use cstree::*`. Otherwise, the module structure is now as follows:
* `cstree`
* `Language`

View file

@ -16,7 +16,7 @@ pub enum Element<'s> {
#[derive(Debug, Clone, Copy)]
pub enum TestKind {
Element { n: u16 },
Element { n: u32 },
Plus,
}
@ -45,7 +45,7 @@ impl<T: Bool> Language for TestLang<T> {
type Kind = TestKind;
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
if raw.0 == u16::MAX - 1 {
if raw.0 == u32::MAX - 1 {
TestKind::Plus
} else {
TestKind::Element { n: raw.0 }
@ -55,7 +55,7 @@ impl<T: Bool> Language for TestLang<T> {
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
match kind {
TestKind::Element { n } => RawSyntaxKind(n),
TestKind::Plus => RawSyntaxKind(u16::MAX - 1),
TestKind::Plus => RawSyntaxKind(u32::MAX - 1),
}
}
@ -85,8 +85,8 @@ where
pub fn build_recursive<T: Bool, I>(
root: &Element<'_>,
builder: &mut GreenNodeBuilder<'_, '_, TestLang<T>, I>,
mut from: u16,
) -> u16
mut from: u32,
) -> u32
where
I: Interner,
{

View file

@ -17,7 +17,7 @@ use cstree::{build::GreenNodeBuilder, interning::Resolver, util::NodeOrToken};
use std::iter::Peekable;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
#[repr(u32)]
enum SyntaxKind {
Whitespace = 0,
@ -35,7 +35,7 @@ use SyntaxKind::*;
impl From<SyntaxKind> for cstree::RawSyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
Self(kind as u32)
}
}
@ -45,8 +45,8 @@ impl cstree::Language for Lang {
type Kind = SyntaxKind;
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
assert!(raw.0 <= Root as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
assert!(raw.0 <= Root as u32);
unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {

View file

@ -7,7 +7,7 @@ use cstree::{
};
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
#[repr(u32)]
pub enum SyntaxKind {
/* Tokens */
Int, // 42
@ -42,7 +42,7 @@ impl Language for Calculator {
}
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
RawSyntaxKind(kind as u16)
RawSyntaxKind(kind as u32)
}
fn static_text(kind: Self::Kind) -> Option<&'static str> {

View file

@ -9,7 +9,7 @@
/// Let's start with defining all kinds of tokens and composite nodes.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
#[repr(u32)]
pub enum SyntaxKind {
LParen = 0, // '('
RParen, // ')'
@ -32,7 +32,7 @@ use SyntaxKind::*;
/// First, to easily pass the enum variants into cstree via `.into()`:
impl From<SyntaxKind> for cstree::RawSyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
Self(kind as u32)
}
}
@ -45,8 +45,8 @@ impl cstree::Language for Lang {
type Kind = SyntaxKind;
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
assert!(raw.0 <= Root as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
assert!(raw.0 <= Root as u32);
unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
@ -421,7 +421,7 @@ nan
/// Split the input string into a flat list of tokens (such as L_PAREN, WORD, and WHITESPACE)
fn lex(text: &str) -> VecDeque<(SyntaxKind, &str)> {
fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0)
m_lexer::TokenKind(cstree::RawSyntaxKind::from(t).0 as u16)
}
fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
match t.0 {

View file

@ -58,12 +58,12 @@
//!
//! First, we need to list the different part of our language's grammar.
//! We can do that using an `enum` with a unit variant for any terminal and non-terminal.
//! The `enum` needs to be convertible to a `u16`, so we use the `repr` attribute to ensure it uses the correct
//! The `enum` needs to be convertible to a `u32`, so we use the `repr` attribute to ensure it uses the correct
//! representation.
//!
//! ```rust,ignore
//! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
//! #[repr(u16)]
//! #[repr(u32)]
//! enum SyntaxKind {
//! /* Tokens */
//! Int, // 42
@ -112,7 +112,7 @@
//! }
//!
//! fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
//! RawSyntaxKind(kind as u16)
//! RawSyntaxKind(kind as u32)
//! }
//!
//! fn static_text(kind: Self::Kind) -> Option<&'static str> {
@ -368,7 +368,7 @@ use std::fmt;
/// `RawSyntaxKind` is a type tag for each token or node.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct RawSyntaxKind(pub u16);
pub struct RawSyntaxKind(pub u32);
/// Typesafe representations of text ranges and sizes.
pub mod text {
@ -423,7 +423,7 @@ pub mod sync {
/// ```
/// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// # #[allow(non_camel_case_types)]
/// #[repr(u16)]
/// #[repr(u32)]
/// enum SyntaxKind {
/// Plus, // `+`
/// Minus, // `-`
@ -442,12 +442,12 @@ pub mod sync {
/// type Kind = SyntaxKind;
///
/// fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
/// assert!(raw.0 <= __LAST as u16);
/// unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
/// assert!(raw.0 <= __LAST as u32);
/// unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
/// }
///
/// fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
/// cstree::RawSyntaxKind(kind as u16)
/// cstree::RawSyntaxKind(kind as u32)
/// }
///
/// fn static_text(kind: Self::Kind) -> Option<&'static str> {
@ -489,7 +489,7 @@ pub mod testing {
pub fn parse<L: Language, I>(_b: &mut GreenNodeBuilder<L, I>, _s: &str) {}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
#[repr(u32)]
#[allow(non_camel_case_types)]
pub enum TestSyntaxKind {
Plus,
@ -510,12 +510,12 @@ pub mod testing {
type Kind = TestSyntaxKind;
fn kind_from_raw(raw: RawSyntaxKind) -> Self::Kind {
assert!(raw.0 <= TestSyntaxKind::__LAST as u16);
unsafe { std::mem::transmute::<u16, TestSyntaxKind>(raw.0) }
assert!(raw.0 <= TestSyntaxKind::__LAST as u32);
unsafe { std::mem::transmute::<u32, TestSyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> RawSyntaxKind {
RawSyntaxKind(kind as u16)
RawSyntaxKind(kind as u32)
}
fn static_text(kind: Self::Kind) -> Option<&'static str> {

View file

@ -244,7 +244,7 @@ impl Serialize for RawSyntaxKind {
where
S: serde::Serializer,
{
serializer.serialize_u16(self.0)
serializer.serialize_u32(self.0)
}
}
@ -253,6 +253,6 @@ impl<'de> Deserialize<'de> for RawSyntaxKind {
where
D: serde::Deserializer<'de>,
{
Ok(Self(u16::deserialize(deserializer)?))
Ok(Self(u32::deserialize(deserializer)?))
}
}

View file

@ -61,7 +61,9 @@ mod tests {
use std::mem::size_of;
assert_eq!(size_of::<SyntaxNode<TestLang>>(), size_of::<*const u8>());
assert_eq!(size_of::<Option<SyntaxNode<TestLang>>>(), size_of::<*const u8>()); // verify niche opt of `NonNull`
assert_eq!(size_of::<SyntaxToken<TestLang>>(), size_of::<*const u8>() + size_of::<u32>() * 2);
// verify niche opt of `NonNull`
assert_eq!(size_of::<Option<SyntaxNode<TestLang>>>(), size_of::<*const u8>());
// parent + child index + text len
assert_eq!(size_of::<SyntaxToken<TestLang>>(), size_of::<SyntaxNode<TestLang>>() + size_of::<u32>() * 2);
}
}

View file

@ -56,7 +56,7 @@ where
node
}
pub fn build_recursive<L, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u16) -> u16
pub fn build_recursive<L, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'_, '_, L, I>, mut from: u32) -> u32
where
L: Language<Kind = RawSyntaxKind>,
I: Interner,

View file

@ -6,7 +6,7 @@ fn empty_tree_arc() {
use cstree::{build::GreenNodeBuilder, syntax::SyntaxNode};
#[allow(non_camel_case_types)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
#[repr(u32)]
enum SyntaxKind {
Root,
}
@ -17,12 +17,12 @@ fn empty_tree_arc() {
type Kind = SyntaxKind;
fn kind_from_raw(raw: cstree::RawSyntaxKind) -> Self::Kind {
assert!(raw.0 <= SyntaxKind::Root as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
assert!(raw.0 <= SyntaxKind::Root as u32);
unsafe { std::mem::transmute::<u32, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> cstree::RawSyntaxKind {
cstree::RawSyntaxKind(kind as u16)
cstree::RawSyntaxKind(kind as u32)
}
fn static_text(_kind: Self::Kind) -> Option<&'static str> {

View file

@ -26,7 +26,7 @@ macro_rules! event_tokens {
Token::BorrowedStr("Token"),
Token::BorrowedStr("c"),
Token::Tuple { len: 2 },
Token::U16($kind),
Token::U32($kind),
Token::BorrowedStr($str),
Token::TupleEnd,
Token::StructEnd,
@ -40,7 +40,7 @@ macro_rules! event_tokens {
Token::BorrowedStr("EnterNode"),
Token::BorrowedStr("c"),
Token::Tuple { len: 2 },
Token::U16($kind),
Token::U32($kind),
Token::Bool($data),
Token::TupleEnd,
Token::StructEnd,