From b3390aeea68a59e65226d7df878af0ab0626e96d Mon Sep 17 00:00:00 2001 From: Domenic Quirl Date: Wed, 23 Jun 2021 13:42:49 +0200 Subject: [PATCH 1/4] speed up green node building by removing copies --- Cargo.toml | 36 +++++++++++++++++++++--------------- src/green/builder.rs | 40 ++++++++++++++++++++++++---------------- src/green/node.rs | 26 +++----------------------- 3 files changed, 48 insertions(+), 54 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6b82fef..736c570 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,32 +1,38 @@ [package] -edition = "2018" -name = "cstree" -version = "0.4.0" -authors = ["Domenic Quirl ", "Aleksey Kladov "] +edition = "2018" +name = "cstree" +version = "0.4.0" +authors = [ + "Domenic Quirl ", + "Aleksey Kladov ", +] description = "Library for generic lossless syntax trees" -license = "MIT OR Apache-2.0" +license = "MIT OR Apache-2.0" repository = "https://github.com/domenicquirl/cstree" -readme = "README.md" +readme = "README.md" + +[profile.release] +debug = true [dependencies] -lasso = "0.5" +lasso = { version="0.5", features=["inline-more"] } text-size = "1.0.0" -fxhash= "0.2.1" -parking_lot= "0.11.1" +fxhash = "0.2.1" +parking_lot = "0.11.1" # Arc triomphe = "0.1.2" [dependencies.serde] -version = "1.0" -optional = true +version = "1.0" +optional = true default-features = false -features = ["derive"] +features = ["derive"] [dev-dependencies] -m_lexer = "0.0.4" -serde_json = "1.0.61" -serde_test = "1.0.119" +m_lexer = "0.0.4" +serde_json = "1.0.61" +serde_test = "1.0.119" crossbeam-utils = "0.8" [features] diff --git a/src/green/builder.rs b/src/green/builder.rs index 973c3d4..a5d753a 100644 --- a/src/green/builder.rs +++ b/src/green/builder.rs @@ -1,6 +1,9 @@ -use std::convert::TryFrom; +use std::{ + convert::TryFrom, + hash::{Hash, Hasher}, +}; -use fxhash::FxHashMap; +use fxhash::{FxHashMap, FxHasher32}; use text_size::TextSize; use crate::{ @@ -132,11 +135,11 @@ where } impl ChildrenIter { - fn new(data: [Option; CHILDREN_CACHE_THRESHOLD], count: usize) -> Self { + fn new() -> Self { ChildrenIter { - data, - idx: 0, - len: count, + data: [None, None, None], + idx: 0, + len: 0, } } } @@ -157,19 +160,24 @@ where } } - let mut data: [Option; CHILDREN_CACHE_THRESHOLD] = [None, None, None]; - let mut count = 0; - - for child in children { - data[count] = Some(child); - count += 1; + let mut new_children = ChildrenIter::new(); + let mut hasher = FxHasher32::default(); + let mut text_len: TextSize = 0.into(); + for (i, child) in children.into_iter().enumerate() { + text_len += child.text_len(); + child.hash(&mut hasher); + new_children.data[i] = Some(child); + new_children.len += 1; } - let children = ChildrenIter::new(data, count); - let head = GreenNodeHead::from_child_iter(kind, children.clone()); + let head = GreenNodeHead { + kind, + text_len, + child_hash: hasher.finish() as u32, + }; self.nodes - .entry(head.clone()) - .or_insert_with(|| GreenNode::from_head_and_children(head, children)) + .entry(head) + .or_insert_with_key(|head| GreenNode::from_head_and_children(head.clone(), new_children)) .clone() } diff --git a/src/green/node.rs b/src/green/node.rs index 1eab4d4..e0b8464 100644 --- a/src/green/node.rs +++ b/src/green/node.rs @@ -15,29 +15,9 @@ use triomphe::{Arc, HeaderWithLength, ThinArc}; #[repr(align(2))] //to use 1 bit for pointer tagging. NB: this is an at-least annotation #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub(super) struct GreenNodeHead { - kind: SyntaxKind, - text_len: TextSize, - child_hash: u32, -} - -impl GreenNodeHead { - #[inline] - pub(super) fn from_child_iter(kind: SyntaxKind, children: I) -> Self - where - I: Iterator, - { - let mut hasher = FxHasher32::default(); - let mut text_len: TextSize = 0.into(); - for child in children { - text_len += child.text_len(); - child.hash(&mut hasher); - } - Self { - kind, - text_len, - child_hash: hasher.finish() as u32, - } - } + pub(super) kind: SyntaxKind, + pub(super) text_len: TextSize, + pub(super) child_hash: u32, } /// Internal node in the immutable "green" tree. From 87699336bb2c40b8f3a5e28d01e1833a1707741a Mon Sep 17 00:00:00 2001 From: Domenic Quirl Date: Wed, 23 Jun 2021 13:46:47 +0200 Subject: [PATCH 2/4] fix `Clone` impls for resolved elements --- src/syntax/resolved.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/syntax/resolved.rs b/src/syntax/resolved.rs index 548f0e3..35e7497 100644 --- a/src/syntax/resolved.rs +++ b/src/syntax/resolved.rs @@ -22,7 +22,6 @@ use crate::{ /// # See also /// [`SyntaxNode`] /// [`SyntaxNode::new_root_with_resolver`] -#[derive(Clone)] #[repr(transparent)] pub struct ResolvedNode { pub(super) syntax: SyntaxNode, @@ -41,6 +40,14 @@ impl ResolvedNode { } } +impl Clone for ResolvedNode { + fn clone(&self) -> Self { + Self { + syntax: self.syntax.clone(), + } + } +} + impl Deref for ResolvedNode { type Target = SyntaxNode; @@ -77,6 +84,14 @@ impl ResolvedToken { } } +impl Clone for ResolvedToken { + fn clone(&self) -> Self { + Self { + syntax: self.syntax.clone(), + } + } +} + impl Deref for ResolvedToken { type Target = SyntaxToken; From c539674fc7d54ce056d973b6148a797a746b3e9d Mon Sep 17 00:00:00 2001 From: Domenic Quirl Date: Wed, 23 Jun 2021 13:58:04 +0200 Subject: [PATCH 3/4] add tree construction benchmark --- Cargo.toml | 5 +++ benches/main.rs | 84 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 benches/main.rs diff --git a/Cargo.toml b/Cargo.toml index 736c570..6aa8610 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,11 @@ m_lexer = "0.0.4" serde_json = "1.0.61" serde_test = "1.0.119" crossbeam-utils = "0.8" +criterion = "0.3" + +[[bench]] +name = "main" +harness = false [features] default = [] diff --git a/benches/main.rs b/benches/main.rs new file mode 100644 index 0000000..18e1560 --- /dev/null +++ b/benches/main.rs @@ -0,0 +1,84 @@ +use criterion::{criterion_group, criterion_main, Criterion, Throughput}; +use cstree::*; +use lasso::{Interner, Rodeo}; + +#[derive(Debug)] +pub enum Element<'s> { + Node(Vec>), + Token(&'s str), +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum TestLang {} +impl Language for TestLang { + type Kind = SyntaxKind; + + fn kind_from_raw(raw: SyntaxKind) -> Self::Kind { + raw + } + + fn kind_to_raw(kind: Self::Kind) -> SyntaxKind { + kind + } +} + +pub fn build_tree_with_cache<'c, 'i, I>(root: &Element<'_>, cache: &'c mut NodeCache<'i, I>) -> GreenNode +where + I: Interner, +{ + let mut builder = GreenNodeBuilder::with_cache(cache); + build_recursive(root, &mut builder, 0); + let (node, interner) = builder.finish(); + assert!(interner.is_none()); + node +} + +pub fn build_recursive<'c, 'i, I>(root: &Element<'_>, builder: &mut GreenNodeBuilder<'c, 'i, I>, mut from: u16) -> u16 +where + I: Interner, +{ + match root { + Element::Node(children) => { + builder.start_node(SyntaxKind(from)); + for child in children { + from = build_recursive(child, builder, from + 1); + } + builder.finish_node(); + } + Element::Token(text) => { + builder.token(SyntaxKind(from), *text); + } + } + from +} + +fn two_level_tree() -> Element<'static> { + use Element::*; + Node(vec![ + Node(vec![Token("0.0"), Token("0.1")]), + Node(vec![Token("1.0")]), + Node(vec![Token("2.0"), Token("2.1"), Token("2.2")]), + ]) +} + +pub fn create(c: &mut Criterion) { + let mut group = c.benchmark_group("qualification"); + group.throughput(Throughput::Elements(1)); + + let mut interner = Rodeo::new(); + let mut cache = NodeCache::with_interner(&mut interner); + let tree = two_level_tree(); + + group.bench_function("two-level tree", |b| { + b.iter(|| { + for _ in 0..100_000 { + let _tree = build_tree_with_cache(&tree, &mut cache); + } + }) + }); + + group.finish(); +} + +criterion_group!(benches, create); +criterion_main!(benches); From 7d71c7d150664fb209c27f212a0bc08f270c6add Mon Sep 17 00:00:00 2001 From: Domenic Quirl Date: Wed, 23 Jun 2021 14:17:43 +0200 Subject: [PATCH 4/4] slightly adjust green token caching --- src/green/builder.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/green/builder.rs b/src/green/builder.rs index a5d753a..81a35dd 100644 --- a/src/green/builder.rs +++ b/src/green/builder.rs @@ -185,7 +185,10 @@ where let text_len = TextSize::try_from(text.len()).unwrap(); let text = self.interner.get_or_intern(text); let data = GreenTokenData { kind, text, text_len }; - self.tokens.entry(data).or_insert_with(|| GreenToken::new(data)).clone() + self.tokens + .entry(data) + .or_insert_with_key(|data| GreenToken::new(*data)) + .clone() } }