1
Fork 0
mirror of https://github.com/RGBCube/cstree synced 2025-07-27 17:17:45 +00:00

initial commit

This commit is contained in:
Domenic Quirl 2021-01-13 10:23:34 +01:00
commit ac4f659470
23 changed files with 5031 additions and 0 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
.vscode
target
*checksum*

241
Cargo.lock generated Normal file
View file

@ -0,0 +1,241 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "ahash"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e"
[[package]]
name = "aho-corasick"
version = "0.7.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada"
dependencies = [
"memchr",
]
[[package]]
name = "byteorder"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cstree"
version = "0.0.2"
dependencies = [
"fxhash",
"lasso",
"m_lexer",
"parking_lot",
"serde",
"servo_arc",
"smallvec",
"text-size",
]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]]
name = "hashbrown"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
dependencies = [
"ahash",
]
[[package]]
name = "instant"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec"
dependencies = [
"cfg-if",
]
[[package]]
name = "lasso"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17823787ed7c3f2ce99d4865d41edd4407b2fb6d9e71d534ec69d832a3ec2df3"
dependencies = [
"hashbrown",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89203f3fba0a3795506acaad8ebce3c80c0af93f994d5a1d7a0b1eeb23271929"
[[package]]
name = "lock_api"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd96ffd135b2fd7b973ac026d28085defbe8983df057ced3eb4f2130b0831312"
dependencies = [
"scopeguard",
]
[[package]]
name = "m_lexer"
version = "0.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7e51ebf91162d585a5bae05e4779efc4a276171cb880d61dd6fab11c98467a7"
dependencies = [
"regex",
]
[[package]]
name = "memchr"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
[[package]]
name = "nodrop"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
[[package]]
name = "parking_lot"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb"
dependencies = [
"instant",
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ccb628cad4f84851442432c60ad8e1f607e29752d0bf072cbd0baf28aa34272"
dependencies = [
"cfg-if",
"instant",
"libc",
"redox_syscall",
"smallvec",
"winapi",
]
[[package]]
name = "redox_syscall"
version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
[[package]]
name = "regex"
version = "1.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f6946991529684867e47d86474e3a6d0c0ab9b82d5821e314b1ede31fa3a4b3"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"thread_local",
]
[[package]]
name = "regex-syntax"
version = "0.6.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae"
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "serde"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36df6ac6412072f67cf767ebbde4133a5b2e88e76dc6187fa7104cd16f783399"
[[package]]
name = "servo_arc"
version = "0.1.1"
dependencies = [
"nodrop",
"stable_deref_trait",
]
[[package]]
name = "smallvec"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "text-size"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f03e7efdedc3bc78cb2337f1e2785c39e45f5ef762d9e4ebb137fff7380a6d8a"
dependencies = [
"serde",
]
[[package]]
name = "thread_local"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
dependencies = [
"lazy_static",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

23
Cargo.toml Normal file
View file

@ -0,0 +1,23 @@
[package]
edition = "2018"
name = "cstree"
version = "0.0.2"
authors = ["Domenic Quirl <DomenicQuirl@pm.me>", "Aleksey Kladov <aleksey.kladov@gmail.com>"]
description = "Library for generic lossless syntax trees"
license = "MIT OR Apache-2.0"
repository = "https://github.com/domenicquirl/cstree"
[dependencies]
serde = { version = "1.0.89", optional = true, default-features = false }
lasso = "0.4.1"
text-size = "1.0.0"
fxhash= "0.2.1"
smallvec = "1.6.1"
servo_arc = { path = "vendor/servo_arc" }
parking_lot= "0.11.1"
[dev-dependencies]
m_lexer = "0.0.4"
[features]
serde1 = ["serde", "text-size/serde"]

201
LICENSE-APACHE Normal file
View file

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

23
LICENSE-MIT Normal file
View file

@ -0,0 +1,23 @@
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

21
README.md Normal file
View file

@ -0,0 +1,21 @@
# `cstree`
`cstree` is a library for creating and working with concrete syntax trees (CSTs).
The concept of CSTs is inspired in part by Swift's [libsyntax](https://github.com/apple/swift/tree/5e2c815edfd758f9b1309ce07bfc01c4bc20ec23/lib/Syntax).
The `cstree` implementation is a fork of the excellent [`rowan`](https://github.com/rust-analyzer/rowan/), developed by the authors of [rust-analyzer](https://github.com/rust-analyzer/rust-analyzer/).
While we are building our own documentation, a conceptual overview of their implementation is available in the [rust-analyzer repo](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md#trees).
Notable differences of `cstree` compared to `rowan`:
- Syntax trees (red trees) are created lazily, but are persistent. Once a node has been created, it will remain allocated, while `rowan` re-creates the red layer on the fly. Apart from the trade-off discussed [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md#memoized-rednodes), this helps to achieve good tree traversal speed while providing the next points:
- Syntax (red) nodes are `Send` and `Sync`, allowing to share realized trees across threads. This is achieved by atomically reference counting syntax trees as a whole, which also gets rid of the need to reference count individual nodes (helping with the point above).
- Syntax nodes can hold custom data.
- `cstree` trees are trees over interned strings. This means `cstree` will deduplicate the text of tokens such as identifiers with the same name. In this position, `rowan` stores each string, with a small string optimization (see [`SmolStr`](https://crates.io/crates/smol_str)).
- Performance optimizations for tree creation: only allocate new nodes on the heap if they are not in cache, avoid recursively hashing subtrees
See `examples/s_expressions` for a tutorial.
## License
`cstree` is primarily distributed under the terms of both the MIT license and the Apache License (Version 2.0).
See `LICENSE-APACHE` and `LICENSE-MIT` for details.

165
examples/math.rs Normal file
View file

@ -0,0 +1,165 @@
//! Example that takes the input
//! 1 + 2 * 3 + 4
//! and builds the tree
//! - Marker(Root)
//! - Marker(Operation)
//! - Marker(Operation)
//! - "1" Token(Number)
//! - "+" Token(Add)
//! - Marker(Operation)
//! - "2" Token(Number)
//! - "*" Token(Mul)
//! - "3" Token(Number)
//! - "+" Token(Add)
//! - "4" Token(Number)
use cstree::{
interning::{Reader, Resolver},
GreenNodeBuilder, NodeOrToken,
};
use std::iter::Peekable;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[allow(non_camel_case_types)]
#[repr(u16)]
enum SyntaxKind {
WHITESPACE = 0,
ADD,
SUB,
MUL,
DIV,
NUMBER,
ERROR,
OPERATION,
ROOT,
}
use SyntaxKind::*;
impl From<SyntaxKind> for cstree::SyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
enum Lang {}
impl cstree::Language for Lang {
type Kind = SyntaxKind;
fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
assert!(raw.0 <= ROOT as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
kind.into()
}
}
type SyntaxNode = cstree::SyntaxNode<Lang>;
#[allow(unused)]
type SyntaxToken = cstree::SyntaxToken<Lang>;
#[allow(unused)]
type SyntaxElement = cstree::NodeOrToken<SyntaxNode, SyntaxToken>;
type SyntaxElementRef<'a> = cstree::NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>;
struct Parser<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> {
builder: GreenNodeBuilder<'static>,
iter: Peekable<I>,
}
impl<'input, I: Iterator<Item = (SyntaxKind, &'input str)>> Parser<'input, I> {
fn peek(&mut self) -> Option<SyntaxKind> {
while self.iter.peek().map(|&(t, _)| t == WHITESPACE).unwrap_or(false) {
self.bump();
}
self.iter.peek().map(|&(t, _)| t)
}
fn bump(&mut self) {
if let Some((token, string)) = self.iter.next() {
self.builder.token(token.into(), string);
}
}
fn parse_val(&mut self) {
match self.peek() {
Some(NUMBER) => self.bump(),
_ => {
self.builder.start_node(ERROR.into());
self.bump();
self.builder.finish_node();
}
}
}
fn handle_operation(&mut self, tokens: &[SyntaxKind], next: fn(&mut Self)) {
let checkpoint = self.builder.checkpoint();
next(self);
while self.peek().map(|t| tokens.contains(&t)).unwrap_or(false) {
self.builder.start_node_at(checkpoint, OPERATION.into());
self.bump();
next(self);
self.builder.finish_node();
}
}
fn parse_mul(&mut self) {
self.handle_operation(&[MUL, DIV], Self::parse_val)
}
fn parse_add(&mut self) {
self.handle_operation(&[ADD, SUB], Self::parse_mul)
}
fn parse(mut self) -> (SyntaxNode, impl Resolver) {
self.builder.start_node(ROOT.into());
self.parse_add();
self.builder.finish_node();
let (tree, resolver) = self.builder.finish();
(SyntaxNode::new_root(tree), resolver.unwrap().into_resolver())
}
}
fn print(indent: usize, element: SyntaxElementRef<'_>, resolver: &impl Resolver) {
let kind: SyntaxKind = element.kind().into();
print!("{:indent$}", "", indent = indent);
match element {
NodeOrToken::Node(node) => {
println!("- {:?}", kind);
for child in node.children_with_tokens() {
print(indent + 2, child, resolver);
}
}
NodeOrToken::Token(token) => println!("- {:?} {:?}", token.text(resolver), kind),
}
}
fn main() {
let (ast, resolver) = Parser {
builder: GreenNodeBuilder::new(),
iter: vec![
// 1 + 2 * 3 + 4
(NUMBER, "1".into()),
(WHITESPACE, " ".into()),
(ADD, "+".into()),
(WHITESPACE, " ".into()),
(NUMBER, "2".into()),
(WHITESPACE, " ".into()),
(MUL, "*".into()),
(WHITESPACE, " ".into()),
(NUMBER, "3".into()),
(WHITESPACE, " ".into()),
(ADD, "+".into()),
(WHITESPACE, " ".into()),
(NUMBER, "4".into()),
]
.into_iter()
.peekable(),
}
.parse();
print(0, (&ast).into(), &resolver);
}

456
examples/s_expressions.rs Normal file
View file

@ -0,0 +1,456 @@
//! In this tutorial, we will write parser
//! and evaluator of arithmetic S-expressions,
//! which look like this:
//! ```
//! (+ (* 15 2) 62)
//! ```
//!
//! It's suggested to read the conceptual overview of the design
//! alongside this tutorial:
//! https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md
/// cstree uses `TextSize` and `TextRange` types to
/// represent utf8 offsets and ranges.
/// Let's start with defining all kinds of tokens and
/// composite nodes.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[allow(non_camel_case_types)]
#[repr(u16)]
enum SyntaxKind {
L_PAREN = 0, // '('
R_PAREN, // ')'
WORD, // '+', '15'
WHITESPACE, // whitespaces is explicit
ERROR, // as well as errors
// composite nodes
LIST, // `(+ 2 3)`
ATOM, // `+`, `15`, wraps a WORD token
ROOT, // top-level node: a list of s-expressions
}
use SyntaxKind::*;
/// Some boilerplate is needed, as cstree settled on using its own
/// `struct SyntaxKind(u16)` internally, instead of accepting the
/// user's `enum SyntaxKind` as a type parameter.
///
/// First, to easily pass the enum variants into cstree via `.into()`:
impl From<SyntaxKind> for cstree::SyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
}
/// Second, implementing the `Language` trait teaches cstree to convert between
/// these two SyntaxKind types, allowing for a nicer SyntaxNode API where
/// "kinds" are values from our `enum SyntaxKind`, instead of plain u16 values.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
enum Lang {}
impl cstree::Language for Lang {
type Kind = SyntaxKind;
fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind {
assert!(raw.0 <= ROOT as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind {
kind.into()
}
}
/// GreenNode is an immutable tree, which is cheap to change,
/// but doesn't contain offsets and parent pointers.
use cstree::{
interning::{Reader, Resolver},
GreenNode,
};
/// You can construct GreenNodes by hand, but a builder
/// is helpful for top-down parsers: it maintains a stack
/// of currently in-progress nodes
use cstree::GreenNodeBuilder;
/// The parse results are stored as a "green tree".
/// We'll discuss working with the results later
struct Parse<I> {
green_node: GreenNode,
resolver: I,
#[allow(unused)]
errors: Vec<String>,
}
/// Now, let's write a parser.
/// Note that `parse` does not return a `Result`:
/// by design, syntax tree can be built even for
/// completely invalid source code.
fn parse(text: &str) -> Parse<impl Resolver> {
struct Parser<'input> {
/// input tokens, including whitespace,
/// in *reverse* order.
tokens: Vec<(SyntaxKind, &'input str)>,
/// the in-progress tree.
builder: GreenNodeBuilder<'static>,
/// the list of syntax errors we've accumulated
/// so far.
errors: Vec<String>,
}
/// The outcome of parsing a single S-expression
enum SexpRes {
/// An S-expression (i.e. an atom, or a list) was successfully parsed
Ok,
/// Nothing was parsed, as no significant tokens remained
Eof,
/// An unexpected ')' was found
RParen,
}
impl Parser<'_> {
fn parse(mut self) -> Parse<impl Resolver> {
// Make sure that the root node covers all source
self.builder.start_node(ROOT.into());
// Parse zero or more S-expressions
loop {
match self.sexp() {
SexpRes::Eof => break,
SexpRes::RParen => {
self.builder.start_node(ERROR.into());
self.errors.push("unmatched `)`".to_string());
self.bump(); // be sure to chug along in case of error
self.builder.finish_node();
}
SexpRes::Ok => (),
}
}
// Don't forget to eat *trailing* whitespace
self.skip_ws();
// Close the root node.
self.builder.finish_node();
// Turn the builder into a GreenNode
let (tree, resolver) = self.builder.finish();
Parse {
green_node: tree,
resolver: resolver.unwrap().into_resolver(),
errors: self.errors,
}
}
fn list(&mut self) {
assert_eq!(self.current(), Some(L_PAREN));
// Start the list node
self.builder.start_node(LIST.into());
self.bump(); // '('
loop {
match self.sexp() {
SexpRes::Eof => {
self.errors.push("expected `)`".to_string());
break;
}
SexpRes::RParen => {
self.bump();
break;
}
SexpRes::Ok => (),
}
}
// close the list node
self.builder.finish_node();
}
fn sexp(&mut self) -> SexpRes {
// Eat leading whitespace
self.skip_ws();
// Either a list, an atom, a closing paren,
// or an eof.
let t = match self.current() {
None => return SexpRes::Eof,
Some(R_PAREN) => return SexpRes::RParen,
Some(t) => t,
};
match t {
L_PAREN => self.list(),
WORD => {
self.builder.start_node(ATOM.into());
self.bump();
self.builder.finish_node();
}
ERROR => self.bump(),
_ => unreachable!(),
}
SexpRes::Ok
}
/// Advance one token, adding it to the current branch of the tree builder.
fn bump(&mut self) {
let (kind, text) = self.tokens.pop().unwrap();
self.builder.token(kind.into(), text);
}
/// Peek at the first unprocessed token
fn current(&self) -> Option<SyntaxKind> {
self.tokens.last().map(|(kind, _)| *kind)
}
fn skip_ws(&mut self) {
while self.current() == Some(WHITESPACE) {
self.bump()
}
}
}
let mut tokens = lex(text);
tokens.reverse();
Parser {
tokens,
builder: GreenNodeBuilder::new(),
errors: Vec::new(),
}
.parse()
}
/// To work with the parse results we need a view into the
/// green tree - the Syntax tree.
/// It is also immutable, like a GreenNode,
/// but it contains parent pointers, offsets, and
/// has identity semantics.
type SyntaxNode = cstree::SyntaxNode<Lang>;
#[allow(unused)]
type SyntaxToken = cstree::SyntaxToken<Lang>;
#[allow(unused)]
type SyntaxElement = cstree::NodeOrToken<SyntaxNode, SyntaxToken>;
impl<I> Parse<I> {
fn syntax(&self) -> SyntaxNode {
SyntaxNode::new_root(self.green_node.clone())
}
}
/// Let's check that the parser works as expected
#[test]
fn test_parser() {
let text = "(+ (* 15 2) 62)";
let parse = parse(text);
let node = parse.syntax();
let resolver = &parse.resolver;
assert_eq!(
node.debug(resolver, false),
"ROOT@0..15", // root node, spanning 15 bytes
);
assert_eq!(node.children().count(), 1);
let list = node.children().next().unwrap();
let children = list
.children_with_tokens()
.map(|child| format!("{:?}@{:?}", child.kind(), child.text_range()))
.collect::<Vec<_>>();
assert_eq!(
children,
vec![
"L_PAREN@0..1".to_string(),
"ATOM@1..2".to_string(),
"WHITESPACE@2..3".to_string(), // note, explicit whitespace!
"LIST@3..11".to_string(),
"WHITESPACE@11..12".to_string(),
"ATOM@12..14".to_string(),
"R_PAREN@14..15".to_string(),
]
);
}
/// So far, we've been working with a homogeneous untyped tree.
/// It's nice to provide generic tree operations, like traversals,
/// but it's a bad fit for semantic analysis.
/// This crate itself does not provide AST facilities directly,
/// but it is possible to layer AST on top of `SyntaxNode` API.
/// Let's write a function to evaluate S-expression.
///
/// For that, let's define AST nodes.
/// It'll be quite a bunch of repetitive code, so we'll use a macro.
///
/// For a real language, you'd want to generate an AST. I find a
/// combination of `serde`, `ron` and `tera` crates invaluable for that!
macro_rules! ast_node {
($ast:ident, $kind:ident) => {
#[derive(PartialEq, Eq, Hash)]
#[repr(transparent)]
struct $ast(SyntaxNode);
impl $ast {
#[allow(unused)]
fn cast(node: SyntaxNode) -> Option<Self> {
if node.kind() == $kind {
Some(Self(node))
} else {
None
}
}
}
};
}
ast_node!(Root, ROOT);
ast_node!(Atom, ATOM);
ast_node!(List, LIST);
// Sexp is slightly different, so let's do it by hand.
#[derive(PartialEq, Eq, Hash)]
#[repr(transparent)]
struct Sexp(SyntaxNode);
enum SexpKind {
Atom(Atom),
List(List),
}
impl Sexp {
fn cast(node: SyntaxNode) -> Option<Self> {
if Atom::cast(node.clone()).is_some() || List::cast(node.clone()).is_some() {
Some(Sexp(node))
} else {
None
}
}
fn kind(&self) -> SexpKind {
Atom::cast(self.0.clone())
.map(SexpKind::Atom)
.or_else(|| List::cast(self.0.clone()).map(SexpKind::List))
.unwrap()
}
}
// Let's enhance AST nodes with ancillary functions and
// eval.
impl Root {
fn sexps(&self) -> impl Iterator<Item = Sexp> + '_ {
self.0.children().cloned().filter_map(Sexp::cast)
}
}
enum Op {
Add,
Sub,
Div,
Mul,
}
impl Atom {
fn eval(&self, resolver: &impl Resolver) -> Option<i64> {
self.text(resolver).parse().ok()
}
fn as_op(&self, resolver: &impl Resolver) -> Option<Op> {
let op = match self.text(resolver) {
"+" => Op::Add,
"-" => Op::Sub,
"*" => Op::Mul,
"/" => Op::Div,
_ => return None,
};
Some(op)
}
fn text<'r>(&self, resolver: &'r impl Resolver) -> &'r str {
match &self.0.green().children().next() {
Some(cstree::NodeOrToken::Token(token)) => token.text(resolver),
_ => unreachable!(),
}
}
}
impl List {
fn sexps(&self) -> impl Iterator<Item = Sexp> + '_ {
self.0.children().cloned().filter_map(Sexp::cast)
}
fn eval(&self, resolver: &impl Resolver) -> Option<i64> {
let op = match self.sexps().nth(0)?.kind() {
SexpKind::Atom(atom) => atom.as_op(resolver)?,
_ => return None,
};
let arg1 = self.sexps().nth(1)?.eval(resolver)?;
let arg2 = self.sexps().nth(2)?.eval(resolver)?;
let res = match op {
Op::Add => arg1 + arg2,
Op::Sub => arg1 - arg2,
Op::Mul => arg1 * arg2,
Op::Div if arg2 == 0 => return None,
Op::Div => arg1 / arg2,
};
Some(res)
}
}
impl Sexp {
fn eval(&self, resolver: &impl Resolver) -> Option<i64> {
match self.kind() {
SexpKind::Atom(atom) => atom.eval(resolver),
SexpKind::List(list) => list.eval(resolver),
}
}
}
impl<I> Parse<I> {
fn root(&self) -> Root {
Root::cast(self.syntax()).unwrap()
}
}
/// Let's test the eval!
fn main() {
let sexps = "
92
(+ 62 30)
(/ 92 0)
nan
(+ (* 15 2) 62)
";
let parse = parse(sexps);
let root = parse.root();
let resolver = &parse.resolver;
let res = root.sexps().map(|it| it.eval(resolver)).collect::<Vec<_>>();
eprintln!("{:?}", res);
assert_eq!(res, vec![Some(92), Some(92), None, None, Some(92),])
}
/// Split the input string into a flat list of tokens
/// (such as L_PAREN, WORD, and WHITESPACE)
fn lex(text: &str) -> Vec<(SyntaxKind, &str)> {
fn tok(t: SyntaxKind) -> m_lexer::TokenKind {
m_lexer::TokenKind(cstree::SyntaxKind::from(t).0)
}
fn kind(t: m_lexer::TokenKind) -> SyntaxKind {
match t.0 {
0 => L_PAREN,
1 => R_PAREN,
2 => WORD,
3 => WHITESPACE,
4 => ERROR,
_ => unreachable!(),
}
}
let lexer = m_lexer::LexerBuilder::new()
.error_token(tok(ERROR))
.tokens(&[
(tok(L_PAREN), r"\("),
(tok(R_PAREN), r"\)"),
(tok(WORD), r"[^\s()]+"),
(tok(WHITESPACE), r"\s+"),
])
.build();
lexer
.tokenize(text)
.into_iter()
.map(|t| (t.len, kind(t.kind)))
.scan(0usize, |start_offset, (len, kind)| {
let s = &text[*start_offset..*start_offset + len];
*start_offset += len;
Some((kind, s))
})
.collect()
}

21
rustfmt.toml Normal file
View file

@ -0,0 +1,21 @@
unstable_features = true
edition = "2018"
max_width = 120
comment_width = 120
wrap_comments = true
format_code_in_doc_comments = true
format_macro_matchers = true
merge_imports = true
reorder_impl_items = true
use_field_init_shorthand = true
# should be 1, but as of writing is too unstable and introduces blank lines at the start of random blocks
blank_lines_lower_bound = 0
struct_field_align_threshold = 8

41
src/green.rs Normal file
View file

@ -0,0 +1,41 @@
mod node;
mod token;
mod element;
mod builder;
pub(crate) use self::element::GreenElementRef;
use self::element::{GreenElement, PackedGreenElement};
pub use self::{
builder::{Checkpoint, GreenNodeBuilder, NodeCache},
node::{Children, GreenNode},
token::GreenToken,
};
/// SyntaxKind is a type tag for each token or node.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct SyntaxKind(pub u16);
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn assert_send_sync() {
fn f<T: Send + Sync>() {}
f::<GreenNode>();
f::<GreenToken>();
f::<GreenElement>();
f::<PackedGreenElement>();
}
#[test]
fn test_size_of() {
use std::mem::size_of;
eprintln!("GreenNode {}", size_of::<GreenNode>());
eprintln!("GreenToken {}", size_of::<GreenToken>());
eprintln!("GreenElement {}", size_of::<GreenElement>());
eprintln!("PackedGreenElement {}", size_of::<PackedGreenElement>());
}
}

225
src/green/builder.rs Normal file
View file

@ -0,0 +1,225 @@
use std::{convert::TryFrom, num::NonZeroUsize};
use fxhash::{FxBuildHasher, FxHashMap};
use lasso::{Capacity, Rodeo, Spur};
use smallvec::SmallVec;
use text_size::TextSize;
use crate::{
green::{GreenElement, GreenNode, GreenToken, SyntaxKind},
interning::Interner,
NodeOrToken,
};
use super::{node::GreenNodeHead, token::GreenTokenData};
#[derive(Debug)]
pub struct NodeCache {
nodes: FxHashMap<GreenNodeHead, GreenNode>,
tokens: FxHashMap<GreenTokenData, GreenToken>,
interner: Rodeo<Spur, FxBuildHasher>,
}
impl NodeCache {
pub fn new() -> Self {
Self {
nodes: FxHashMap::default(),
tokens: FxHashMap::default(),
interner: Rodeo::with_capacity_and_hasher(
// capacity values suggested by author of `lasso`
Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }),
FxBuildHasher::default(),
),
}
}
fn node<I>(&mut self, kind: SyntaxKind, children: I) -> GreenNode
where
I: IntoIterator<Item = GreenElement>,
I::IntoIter: ExactSizeIterator,
{
let children = children.into_iter();
// Green nodes are fully immutable, so it's ok to deduplicate them.
// This is the same optimization that Roslyn does
// https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees
//
// For example, all `#[inline]` in this file share the same green node!
// For `libsyntax/parse/parser.rs`, measurements show that deduping saves
// 17% of the memory for green nodes!
if children.len() <= 3 {
let children: SmallVec<[_; 3]> = children.collect();
let head = GreenNodeHead::from_child_slice(kind, children.as_ref());
self.nodes
.entry(head.clone())
.or_insert_with(|| GreenNode::from_head_and_children(head, children))
.clone()
} else {
GreenNode::new(kind, children)
}
}
fn token(&mut self, kind: SyntaxKind, text: &str) -> GreenToken {
let text_len = TextSize::try_from(text.len()).unwrap();
let text = self.interner.get_or_intern(text);
let data = GreenTokenData { kind, text, text_len };
self.tokens
.entry(data.clone())
.or_insert_with(|| GreenToken::new(data))
.clone()
}
}
#[derive(Debug)]
enum MaybeOwned<'a, T> {
Owned(T),
Borrowed(&'a mut T),
}
impl<T> std::ops::Deref for MaybeOwned<'_, T> {
type Target = T;
fn deref(&self) -> &T {
match self {
MaybeOwned::Owned(it) => it,
MaybeOwned::Borrowed(it) => *it,
}
}
}
impl<T> std::ops::DerefMut for MaybeOwned<'_, T> {
fn deref_mut(&mut self) -> &mut T {
match self {
MaybeOwned::Owned(it) => it,
MaybeOwned::Borrowed(it) => *it,
}
}
}
impl<T: Default> Default for MaybeOwned<'_, T> {
fn default() -> Self {
MaybeOwned::Owned(T::default())
}
}
/// A checkpoint for maybe wrapping a node. See `GreenNodeBuilder::checkpoint` for details.
#[derive(Clone, Copy, Debug)]
pub struct Checkpoint(usize);
/// A builder for a green tree.
#[derive(Debug)]
pub struct GreenNodeBuilder<'cache> {
cache: MaybeOwned<'cache, NodeCache>,
parents: Vec<(SyntaxKind, usize)>,
children: Vec<GreenElement>,
}
impl GreenNodeBuilder<'_> {
/// Creates new builder.
pub fn new() -> GreenNodeBuilder<'static> {
GreenNodeBuilder {
cache: MaybeOwned::Owned(NodeCache::new()),
parents: Vec::with_capacity(8),
children: Vec::with_capacity(8),
}
}
/// Reusing `NodeCache` between different `GreenNodeBuilder`s saves memory.
/// It allows to structurally share underlying trees.
pub fn with_cache(cache: &mut NodeCache) -> GreenNodeBuilder<'_> {
GreenNodeBuilder {
cache: MaybeOwned::Borrowed(cache),
parents: Vec::with_capacity(8),
children: Vec::with_capacity(8),
}
}
/// Adds new token to the current branch.
#[inline]
pub fn token(&mut self, kind: SyntaxKind, text: &str) {
let token = self.cache.token(kind, text);
self.children.push(token.into());
}
/// Start new node and make it current.
#[inline]
pub fn start_node(&mut self, kind: SyntaxKind) {
let len = self.children.len();
self.parents.push((kind, len));
}
/// Finish current branch and restore previous
/// branch as current.
#[inline]
pub fn finish_node(&mut self) {
let (kind, first_child) = self.parents.pop().unwrap();
let children = self.children.drain(first_child..);
let node = self.cache.node(kind, children);
self.children.push(node.into());
}
/// Prepare for maybe wrapping the next node.
/// The way wrapping works is that you first of all get a checkpoint,
/// then you place all tokens you want to wrap, and then *maybe* call
/// `start_node_at`.
/// Example:
/// ```rust
/// # use cstree::{GreenNodeBuilder, SyntaxKind};
/// # const PLUS: SyntaxKind = SyntaxKind(0);
/// # const OPERATION: SyntaxKind = SyntaxKind(1);
/// # struct Parser;
/// # impl Parser {
/// # fn peek(&self) -> Option<SyntaxKind> { None }
/// # fn parse_expr(&mut self) {}
/// # }
/// # let mut builder = GreenNodeBuilder::new();
/// # let mut parser = Parser;
/// let checkpoint = builder.checkpoint();
/// parser.parse_expr();
/// if parser.peek() == Some(PLUS) {
/// // 1 + 2 = Add(1, 2)
/// builder.start_node_at(checkpoint, OPERATION);
/// parser.parse_expr();
/// builder.finish_node();
/// }
/// ```
#[inline]
pub fn checkpoint(&self) -> Checkpoint {
Checkpoint(self.children.len())
}
/// Wrap the previous branch marked by `checkpoint` in a new branch and
/// make it current.
#[inline]
pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
let Checkpoint(checkpoint) = checkpoint;
assert!(
checkpoint <= self.children.len(),
"checkpoint no longer valid, was finish_node called early?"
);
if let Some(&(_, first_child)) = self.parents.last() {
assert!(
checkpoint >= first_child,
"checkpoint no longer valid, was an unmatched start_node_at called?"
);
}
self.parents.push((kind, checkpoint));
}
/// Complete tree building. Make sure that
/// `start_node_at` and `finish_node` calls
/// are paired!
#[inline]
pub fn finish(mut self) -> (GreenNode, Option<impl Interner<Spur>>) {
assert_eq!(self.children.len(), 1);
let resolver = match self.cache {
MaybeOwned::Owned(cache) => Some(cache.interner),
MaybeOwned::Borrowed(_) => None,
};
match self.children.pop().unwrap() {
NodeOrToken::Node(node) => (node, resolver),
NodeOrToken::Token(_) => panic!(),
}
}
}

212
src/green/element.rs Normal file
View file

@ -0,0 +1,212 @@
use std::{fmt, hash, mem};
// NOTE: From `thin_dst`:
// This MUST be size=1 such that pointer math actually advances the pointer.
type ErasedPtr = *const u8;
use crate::{
green::{GreenNode, GreenToken, SyntaxKind},
NodeOrToken, TextSize,
};
pub(super) type GreenElement = NodeOrToken<GreenNode, GreenToken>;
pub(crate) type GreenElementRef<'a> = NodeOrToken<&'a GreenNode, &'a GreenToken>;
#[repr(transparent)]
pub(super) struct PackedGreenElement {
ptr: ErasedPtr,
}
impl From<GreenNode> for GreenElement {
#[inline]
fn from(node: GreenNode) -> GreenElement {
NodeOrToken::Node(node)
}
}
impl<'a> From<&'a GreenNode> for GreenElementRef<'a> {
#[inline]
fn from(node: &'a GreenNode) -> GreenElementRef<'a> {
NodeOrToken::Node(node)
}
}
impl From<GreenNode> for PackedGreenElement {
#[inline]
fn from(node: GreenNode) -> PackedGreenElement {
unsafe { mem::transmute(node) }
}
}
impl From<GreenToken> for GreenElement {
#[inline]
fn from(token: GreenToken) -> GreenElement {
NodeOrToken::Token(token)
}
}
impl<'a> From<&'a GreenToken> for GreenElementRef<'a> {
#[inline]
fn from(token: &'a GreenToken) -> GreenElementRef<'a> {
NodeOrToken::Token(token)
}
}
impl From<GreenToken> for PackedGreenElement {
#[inline]
fn from(token: GreenToken) -> PackedGreenElement {
unsafe { mem::transmute(token) }
}
}
impl GreenElement {
/// Returns kind of this element.
#[inline]
pub fn kind(&self) -> SyntaxKind {
self.as_ref().kind()
}
/// Returns the length of the text covered by this element.
#[inline]
pub fn text_len(&self) -> TextSize {
self.as_ref().text_len()
}
}
impl GreenElementRef<'_> {
/// Returns kind of this element.
#[inline]
pub fn kind(&self) -> SyntaxKind {
match self {
NodeOrToken::Node(it) => it.kind(),
NodeOrToken::Token(it) => it.kind(),
}
}
/// Returns the length of the text covered by this element.
#[inline]
pub fn text_len(self) -> TextSize {
match self {
NodeOrToken::Node(it) => it.text_len(),
NodeOrToken::Token(it) => it.text_len(),
}
}
}
impl From<GreenElement> for PackedGreenElement {
fn from(element: GreenElement) -> Self {
match element {
NodeOrToken::Node(node) => node.into(),
NodeOrToken::Token(token) => token.into(),
}
}
}
impl From<PackedGreenElement> for GreenElement {
fn from(element: PackedGreenElement) -> Self {
if element.is_node() {
NodeOrToken::Node(element.into_node().unwrap())
} else {
NodeOrToken::Token(element.into_token().unwrap())
}
}
}
impl PackedGreenElement {
fn is_node(&self) -> bool {
self.ptr as usize & 1 == 0
}
pub(crate) fn as_node(&self) -> Option<&GreenNode> {
if self.is_node() {
unsafe { Some(&*(&self.ptr as *const ErasedPtr as *const GreenNode)) }
} else {
None
}
}
pub(crate) fn into_node(self) -> Option<GreenNode> {
if self.is_node() {
unsafe { Some(mem::transmute(self)) }
} else {
None
}
}
pub(crate) fn as_token(&self) -> Option<&GreenToken> {
if !self.is_node() {
unsafe { Some(&*(&self.ptr as *const ErasedPtr as *const GreenToken)) }
} else {
None
}
}
pub(crate) fn into_token(self) -> Option<GreenToken> {
if !self.is_node() {
unsafe { Some(mem::transmute(self)) }
} else {
None
}
}
pub(crate) fn as_ref(&self) -> GreenElementRef<'_> {
if self.is_node() {
NodeOrToken::Node(self.as_node().unwrap())
} else {
NodeOrToken::Token(self.as_token().unwrap())
}
}
}
impl fmt::Debug for PackedGreenElement {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_node() {
self.as_node().unwrap().fmt(f)
} else {
self.as_token().unwrap().fmt(f)
}
}
}
impl Eq for PackedGreenElement {}
impl PartialEq for PackedGreenElement {
fn eq(&self, other: &Self) -> bool {
self.as_node() == other.as_node() && self.as_token() == other.as_token()
}
}
impl hash::Hash for PackedGreenElement {
fn hash<H>(&self, state: &mut H)
where
H: hash::Hasher,
{
if self.is_node() {
self.as_node().unwrap().hash(state)
} else {
self.as_token().unwrap().hash(state)
}
}
}
impl Drop for PackedGreenElement {
fn drop(&mut self) {
if self.is_node() {
PackedGreenElement { ptr: self.ptr }.into_node();
} else {
PackedGreenElement { ptr: self.ptr }.into_token();
}
}
}
unsafe impl Send for PackedGreenElement
where
GreenToken: Send,
GreenNode: Send,
{
}
unsafe impl Sync for PackedGreenElement
where
GreenToken: Sync,
GreenNode: Sync,
{
}

220
src/green/node.rs Normal file
View file

@ -0,0 +1,220 @@
use std::{
hash::{Hash, Hasher},
iter::FusedIterator,
slice,
};
use fxhash::FxHasher32;
use servo_arc::{Arc, HeaderSlice, HeaderWithLength, ThinArc};
use crate::{
green::{GreenElement, GreenElementRef, PackedGreenElement, SyntaxKind},
TextSize,
};
#[repr(align(2))] // NB: this is an at-least annotation
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(super) struct GreenNodeHead {
kind: SyntaxKind,
text_len: TextSize,
child_hash: u32,
}
impl GreenNodeHead {
#[inline]
pub(super) fn from_child_slice(kind: SyntaxKind, children: &[GreenElement]) -> Self {
let mut hasher = FxHasher32::default();
let mut text_len: TextSize = 0.into();
for child in children {
text_len += child.text_len();
child.hash(&mut hasher);
}
Self {
kind,
text_len,
child_hash: hasher.finish() as u32,
}
}
}
/// Internal node in the immutable tree.
/// It has other nodes and tokens as children.
#[derive(Clone, PartialEq, Eq)]
pub struct GreenNode {
pub(super) data: ThinArc<GreenNodeHead, PackedGreenElement>,
}
impl std::fmt::Debug for GreenNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.data.with_arc(|data| data.fmt(f))
}
}
impl GreenNode {
/// Creates new Node.
#[inline]
pub fn new<I>(kind: SyntaxKind, children: I) -> GreenNode
where
I: IntoIterator<Item = GreenElement>,
I::IntoIter: ExactSizeIterator,
{
let mut hasher = FxHasher32::default();
let mut text_len: TextSize = 0.into();
let children = children
.into_iter()
.inspect(|it| {
text_len += it.text_len();
it.hash(&mut hasher);
})
.map(PackedGreenElement::from);
let header = HeaderWithLength::new(
GreenNodeHead {
kind,
text_len: 0.into(),
child_hash: 0,
},
children.len(),
);
let mut data = Arc::from_header_and_iter(header, children);
// XXX: fixup `text_len` and `child_hash` after construction, because
// we can't iterate `children` twice.
let header = &mut Arc::get_mut(&mut data).unwrap().header.header;
header.text_len = text_len;
header.child_hash = hasher.finish() as u32;
GreenNode {
data: Arc::into_thin(data),
}
}
#[inline]
pub(super) fn from_head_and_children<I>(header: GreenNodeHead, children: I) -> GreenNode
where
I: IntoIterator<Item = GreenElement>,
I::IntoIter: ExactSizeIterator,
{
let children = children.into_iter().map(PackedGreenElement::from);
let header = HeaderWithLength::new(header, children.len());
GreenNode {
data: Arc::into_thin(Arc::from_header_and_iter(header, children)),
}
}
/// Kind of this node.
#[inline]
pub fn kind(&self) -> SyntaxKind {
self.data.header.header.kind
}
/// Returns the length of the text covered by this node.
#[inline]
pub fn text_len(&self) -> TextSize {
self.data.header.header.text_len
}
/// Children of this node.
#[inline]
pub fn children(&self) -> Children<'_> {
Children {
inner: self.data.slice.iter(),
}
}
pub(crate) fn ptr(&self) -> *const u8 {
let r: &HeaderSlice<_, _> = &self.data;
r as *const _ as _
}
}
impl Hash for GreenNode {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.data.header.header.hash(state);
}
}
#[derive(Debug, Clone)]
pub struct Children<'a> {
inner: slice::Iter<'a, PackedGreenElement>,
}
// NB: forward everything stable that iter::Slice specializes as of Rust 1.39.0
impl ExactSizeIterator for Children<'_> {
#[inline(always)]
fn len(&self) -> usize {
self.inner.len()
}
}
impl<'a> Iterator for Children<'a> {
type Item = GreenElementRef<'a>;
#[inline]
fn next(&mut self) -> Option<GreenElementRef<'a>> {
self.inner.next().map(PackedGreenElement::as_ref)
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
#[inline]
fn count(self) -> usize
where
Self: Sized,
{
self.inner.count()
}
#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
self.inner.nth(n).map(PackedGreenElement::as_ref)
}
#[inline]
fn last(mut self) -> Option<Self::Item>
where
Self: Sized,
{
self.next_back()
}
#[inline]
fn fold<Acc, Fold>(mut self, init: Acc, mut f: Fold) -> Acc
where
Fold: FnMut(Acc, Self::Item) -> Acc,
{
let mut accum = init;
while let Some(x) = self.next() {
accum = f(accum, x);
}
accum
}
}
impl<'a> DoubleEndedIterator for Children<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.inner.next_back().map(PackedGreenElement::as_ref)
}
#[inline]
fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
self.inner.nth_back(n).map(PackedGreenElement::as_ref)
}
#[inline]
fn rfold<Acc, Fold>(mut self, init: Acc, mut f: Fold) -> Acc
where
Fold: FnMut(Acc, Self::Item) -> Acc,
{
let mut accum = init;
while let Some(x) = self.next_back() {
accum = f(accum, x);
}
accum
}
}
impl FusedIterator for Children<'_> {}

120
src/green/token.rs Normal file
View file

@ -0,0 +1,120 @@
use servo_arc::Arc;
use std::{fmt, hash, mem::ManuallyDrop, ptr};
use crate::{green::SyntaxKind, interning::Resolver, TextSize};
use lasso::Spur;
#[repr(align(2))] // NB: this is an at-least annotation
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
pub struct GreenTokenData {
pub kind: SyntaxKind,
pub text: Spur,
pub text_len: TextSize,
}
/// Leaf node in the immutable tree.
pub struct GreenToken {
ptr: ptr::NonNull<GreenTokenData>,
}
unsafe impl Send for GreenToken {} // where GreenTokenData: Send + Sync
unsafe impl Sync for GreenToken {} // where GreenTokenData: Send + Sync
impl GreenToken {
fn add_tag(ptr: ptr::NonNull<GreenTokenData>) -> ptr::NonNull<GreenTokenData> {
unsafe {
let ptr = ((ptr.as_ptr() as usize) | 1) as *mut GreenTokenData;
ptr::NonNull::new_unchecked(ptr)
}
}
fn remove_tag(ptr: ptr::NonNull<GreenTokenData>) -> ptr::NonNull<GreenTokenData> {
unsafe {
let ptr = ((ptr.as_ptr() as usize) & !1) as *mut GreenTokenData;
ptr::NonNull::new_unchecked(ptr)
}
}
fn data(&self) -> &GreenTokenData {
unsafe { &*Self::remove_tag(self.ptr).as_ptr() }
}
/// Creates new Token.
#[inline]
pub fn new(data: GreenTokenData) -> GreenToken {
let ptr = Arc::into_raw(Arc::new(data));
let ptr = ptr::NonNull::new(ptr as *mut _).unwrap();
GreenToken {
ptr: Self::add_tag(ptr),
}
}
/// Kind of this Token.
#[inline]
pub fn kind(&self) -> SyntaxKind {
self.data().kind
}
/// Text of this Token.
#[inline]
pub fn text<'i, I>(&self, resolver: &'i I) -> &'i str
where
I: Resolver + ?Sized,
{
resolver.resolve(&self.data().text)
}
/// Returns the length of the text covered by this token.
#[inline]
pub fn text_len(&self) -> TextSize {
self.data().text_len
}
}
impl fmt::Debug for GreenToken {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let data = self.data();
f.debug_struct("GreenToken")
.field("kind", &data.kind)
.field("text", &data.text)
.finish()
}
}
impl Clone for GreenToken {
fn clone(&self) -> Self {
let ptr = Self::remove_tag(self.ptr);
let ptr = unsafe {
let arc = ManuallyDrop::new(Arc::from_raw(ptr.as_ptr()));
Arc::into_raw(Arc::clone(&arc))
};
let ptr = ptr::NonNull::new(ptr as *mut _).unwrap();
GreenToken {
ptr: Self::add_tag(ptr),
}
}
}
impl Eq for GreenToken {}
impl PartialEq for GreenToken {
fn eq(&self, other: &Self) -> bool {
self.data() == other.data()
}
}
impl hash::Hash for GreenToken {
fn hash<H>(&self, state: &mut H)
where
H: hash::Hasher,
{
self.data().hash(state)
}
}
impl Drop for GreenToken {
fn drop(&mut self) {
unsafe {
Arc::from_raw(Self::remove_tag(self.ptr).as_ptr());
}
}
}

67
src/lib.rs Normal file
View file

@ -0,0 +1,67 @@
//! `cstree` is a generic library for creating and working with concrete syntax trees.
//! The concept of CSTs is inspired in part by Swift's
//! [libsyntax](https://github.com/apple/swift/tree/5e2c815edfd758f9b1309ce07bfc01c4bc20ec23/lib/Syntax).
//!
//! The `cstree` implementation is a fork of the excellent
//! [`rowan`](https://github.com/rust-analyzer/rowan/), developed by the authors of
//! [rust-analyzer](https://github.com/rust-analyzer/rust-analyzer/).
//! While we are building our own documentation, a conceptual overview of their implementation is
//! available in the [rust-analyzer
//! repo](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md#trees).
//!
//! Notable differences of `cstree` compared to `rowan`:
//! - Syntax trees (red trees) are created lazily, but are persistent. Once a node has been created,
//! it will remain allocated, while `rowan` re-creates the red layer on the fly. Apart from the
//! trade-off discussed
//! [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md#memoized-rednodes),
//! this helps to achieve good tree traversal speed while providing the next points:
//! - Syntax (red) nodes are `Send` and `Sync`, allowing to share realized trees across threads. This is achieved by
//! atomically reference counting syntax trees as a whole, which also gets rid of the need to reference count
//! individual nodes (helping with the point above).
//! - Syntax nodes can hold custom data.
//! - `cstree` trees are trees over interned strings. This means `cstree` will deduplicate the text
//! of tokens such as identifiers with the same name. In this position, `rowan` stores each string,
//! with a small string optimization (see [`SmolStr`](https://crates.io/crates/smol_str)).
//! - Performance optimizations for tree creation: only allocate new nodes on the heap if they are not in cache, avoid
//! recursively hashing subtrees
//!
//! See `examples/s_expressions.rs` for a tutorial.
#![forbid(
// missing_debug_implementations,
unconditional_recursion,
future_incompatible,
// missing_docs,
)]
#![deny(unsafe_code)]
#[allow(unsafe_code)]
mod green;
#[allow(unsafe_code)]
pub mod syntax;
#[cfg(feature = "serde1")]
mod serde_impls;
mod syntax_text;
mod utility_types;
pub mod interning {
pub use lasso::{Interner, Reader, Resolver};
}
use std::fmt;
// Reexport types for working with strings.
pub use text_size::{TextLen, TextRange, TextSize};
pub use crate::{
green::{Checkpoint, Children, GreenNode, GreenNodeBuilder, GreenToken, SyntaxKind},
syntax::{SyntaxElement, SyntaxElementChildren, SyntaxElementRef, SyntaxNode, SyntaxNodeChildren, SyntaxToken},
syntax_text::SyntaxText,
utility_types::{Direction, NodeOrToken, TokenAtOffset, WalkEvent},
};
pub trait Language: Sized + Clone + Copy + fmt::Debug + Eq + Ord + std::hash::Hash {
type Kind: fmt::Debug;
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind;
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind;
}

66
src/serde_impls.rs Normal file
View file

@ -0,0 +1,66 @@
use serde::ser::{Serialize, SerializeMap, SerializeSeq, Serializer};
use std::fmt;
use crate::{
api::{Language, SyntaxNode, SyntaxToken},
NodeOrToken,
};
struct SerDisplay<T>(T);
impl<T: fmt::Display> Serialize for SerDisplay<T> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.collect_str(&self.0)
}
}
struct DisplayDebug<T>(T);
impl<T: fmt::Debug> fmt::Display for DisplayDebug<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.0, f)
}
}
impl<L: Language> Serialize for SyntaxNode<L> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut state = serializer.serialize_map(Some(3))?;
state.serialize_entry("kind", &SerDisplay(DisplayDebug(self.kind())))?;
state.serialize_entry("text_range", &self.text_range())?;
state.serialize_entry("children", &Children(self))?;
state.end()
}
}
impl<L: Language> Serialize for SyntaxToken<L> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut state = serializer.serialize_map(Some(3))?;
state.serialize_entry("kind", &SerDisplay(DisplayDebug(self.kind())))?;
state.serialize_entry("text_range", &self.text_range())?;
state.serialize_entry("text", &self.text().as_str())?;
state.end()
}
}
struct Children<T>(T);
impl<L: Language> Serialize for Children<&'_ SyntaxNode<L>> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut state = serializer.serialize_seq(None)?;
self.0.children_with_tokens().try_for_each(|element| match element {
NodeOrToken::Node(it) => state.serialize_element(&it),
NodeOrToken::Token(it) => state.serialize_element(&it),
})?;
state.end()
}
}

1269
src/syntax.rs Normal file

File diff suppressed because it is too large Load diff

357
src/syntax_text.rs Normal file
View file

@ -0,0 +1,357 @@
use std::fmt;
use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, TextSize};
#[derive(Clone)]
pub struct SyntaxText<'n, 'i, I: ?Sized, L: Language, D: 'static = ()> {
node: &'n SyntaxNode<L, D>,
range: TextRange,
resolver: &'i I,
}
impl<'n, 'i, I: Resolver + ?Sized, L: Language, D> SyntaxText<'n, 'i, I, L, D> {
pub(crate) fn new(node: &'n SyntaxNode<L, D>, resolver: &'i I) -> Self {
let range = node.text_range();
SyntaxText { node, range, resolver }
}
pub fn len(&self) -> TextSize {
self.range.len()
}
pub fn is_empty(&self) -> bool {
self.range.is_empty()
}
pub fn contains_char(&self, c: char) -> bool {
self.try_for_each_chunk(|chunk| if chunk.contains(c) { Err(()) } else { Ok(()) })
.is_err()
}
pub fn find_char(&self, c: char) -> Option<TextSize> {
let mut acc: TextSize = 0.into();
let res = self.try_for_each_chunk(|chunk| {
if let Some(pos) = chunk.find(c) {
let pos: TextSize = (pos as u32).into();
return Err(acc + pos);
}
acc += TextSize::of(chunk);
Ok(())
});
found(res)
}
pub fn char_at(&self, offset: TextSize) -> Option<char> {
let offset = offset.into();
let mut start: TextSize = 0.into();
let res = self.try_for_each_chunk(|chunk| {
let end = start + TextSize::of(chunk);
if start <= offset && offset < end {
let off: usize = u32::from(offset - start) as usize;
return Err(chunk[off..].chars().next().unwrap());
}
start = end;
Ok(())
});
found(res)
}
pub fn slice<R: private::SyntaxTextRange>(&self, range: R) -> Self {
let start = range.start().unwrap_or_default();
let end = range.end().unwrap_or(self.len());
assert!(start <= end);
let len = end - start;
let start = self.range.start() + start;
let end = start + len;
assert!(
start <= end,
"invalid slice, range: {:?}, slice: {:?}",
self.range,
(range.start(), range.end()),
);
let range = TextRange::new(start, end);
assert!(
self.range.contains_range(range),
"invalid slice, range: {:?}, slice: {:?}",
self.range,
range,
);
SyntaxText {
node: self.node,
range,
resolver: self.resolver,
}
}
pub fn try_fold_chunks<T, F, E>(&self, init: T, mut f: F) -> Result<T, E>
where
F: FnMut(T, &str) -> Result<T, E>,
{
self.tokens_with_ranges().try_fold(init, move |acc, (token, range)| {
f(acc, &token.text(self.resolver)[range])
})
}
pub fn try_for_each_chunk<F: FnMut(&str) -> Result<(), E>, E>(&self, mut f: F) -> Result<(), E> {
self.try_fold_chunks((), move |(), chunk| f(chunk))
}
pub fn for_each_chunk<F: FnMut(&str)>(&self, mut f: F) {
enum Void {}
match self.try_for_each_chunk(|chunk| Ok::<(), Void>(f(chunk))) {
Ok(()) => (),
Err(void) => match void {},
}
}
fn tokens_with_ranges(&self) -> impl Iterator<Item = (&SyntaxToken<L, D>, TextRange)> {
let text_range = self.range;
self.node
.descendants_with_tokens()
.filter_map(|element| element.into_token())
.filter_map(move |token| {
let token_range = token.text_range();
let range = text_range.intersect(token_range)?;
Some((token, range - token_range.start()))
})
}
}
fn found<T>(res: Result<(), T>) -> Option<T> {
match res {
Ok(()) => None,
Err(it) => Some(it),
}
}
impl<I: Resolver + ?Sized, L: Language, D> fmt::Debug for SyntaxText<'_, '_, I, L, D> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.to_string(), f)
}
}
impl<I: Resolver + ?Sized, L: Language, D> fmt::Display for SyntaxText<'_, '_, I, L, D> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.try_for_each_chunk(|chunk| fmt::Display::fmt(chunk, f))
}
}
impl<I: Resolver + ?Sized, L: Language, D> From<SyntaxText<'_, '_, I, L, D>> for String {
fn from(text: SyntaxText<'_, '_, I, L, D>) -> String {
text.to_string()
}
}
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<str> for SyntaxText<'_, '_, I, L, D> {
fn eq(&self, mut rhs: &str) -> bool {
self.try_for_each_chunk(|chunk| {
if !rhs.starts_with(chunk) {
return Err(());
}
rhs = &rhs[chunk.len()..];
Ok(())
})
.is_ok()
&& rhs.is_empty()
}
}
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for str {
fn eq(&self, rhs: &SyntaxText<'_, '_, I, L, D>) -> bool {
rhs == self
}
}
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<&'_ str> for SyntaxText<'_, '_, I, L, D> {
fn eq(&self, rhs: &&str) -> bool {
self == *rhs
}
}
impl<I: Resolver + ?Sized, L: Language, D> PartialEq<SyntaxText<'_, '_, I, L, D>> for &'_ str {
fn eq(&self, rhs: &SyntaxText<'_, '_, I, L, D>) -> bool {
rhs == self
}
}
impl<'n1, 'i1, 'n2, 'i2, I1, I2, D1, D2, L1, L2> PartialEq<SyntaxText<'n2, 'i2, I2, L2, D2>>
for SyntaxText<'n1, 'i1, I1, L1, D1>
where
L1: Language,
L2: Language,
I1: Resolver + ?Sized,
I2: Resolver + ?Sized,
{
fn eq(&self, other: &SyntaxText<'_, '_, I2, L2, D2>) -> bool {
if self.range.len() != other.range.len() {
return false;
}
let mut lhs = self.tokens_with_ranges();
let mut rhs = other.tokens_with_ranges();
zip_texts(&mut lhs, &mut rhs, self.resolver, other.resolver).is_none()
&& lhs.all(|it| it.1.is_empty())
&& rhs.all(|it| it.1.is_empty())
}
}
fn zip_texts<'it1, 'it2, It1, It2, I1, I2, L1, L2, D1, D2>(
xs: &mut It1,
ys: &mut It2,
resolver_x: &I1,
resolver_y: &I2,
) -> Option<()>
where
It1: Iterator<Item = (&'it1 SyntaxToken<L1, D1>, TextRange)>,
It2: Iterator<Item = (&'it2 SyntaxToken<L2, D2>, TextRange)>,
I1: Resolver + ?Sized,
I2: Resolver + ?Sized,
D1: 'static,
D2: 'static,
L1: Language + 'it1,
L2: Language + 'it2,
{
let mut x = xs.next()?;
let mut y = ys.next()?;
loop {
while x.1.is_empty() {
x = xs.next()?;
}
while y.1.is_empty() {
y = ys.next()?;
}
let x_text = &x.0.text(resolver_x)[x.1];
let y_text = &y.0.text(resolver_y)[y.1];
if !(x_text.starts_with(y_text) || y_text.starts_with(x_text)) {
return Some(());
}
let advance = std::cmp::min(x.1.len(), y.1.len());
x.1 = TextRange::new(x.1.start() + advance, x.1.end());
y.1 = TextRange::new(y.1.start() + advance, y.1.end());
}
}
impl<I: Resolver + ?Sized, L: Language, D> Eq for SyntaxText<'_, '_, I, L, D> {}
mod private {
use std::ops;
use crate::{TextRange, TextSize};
pub trait SyntaxTextRange {
fn start(&self) -> Option<TextSize>;
fn end(&self) -> Option<TextSize>;
}
impl SyntaxTextRange for TextRange {
fn start(&self) -> Option<TextSize> {
Some(TextRange::start(*self))
}
fn end(&self) -> Option<TextSize> {
Some(TextRange::end(*self))
}
}
impl SyntaxTextRange for ops::Range<TextSize> {
fn start(&self) -> Option<TextSize> {
Some(self.start)
}
fn end(&self) -> Option<TextSize> {
Some(self.end)
}
}
impl SyntaxTextRange for ops::RangeFrom<TextSize> {
fn start(&self) -> Option<TextSize> {
Some(self.start)
}
fn end(&self) -> Option<TextSize> {
None
}
}
impl SyntaxTextRange for ops::RangeTo<TextSize> {
fn start(&self) -> Option<TextSize> {
None
}
fn end(&self) -> Option<TextSize> {
Some(self.end)
}
}
impl SyntaxTextRange for ops::RangeFull {
fn start(&self) -> Option<TextSize> {
None
}
fn end(&self) -> Option<TextSize> {
None
}
}
}
#[cfg(test)]
mod tests {
use crate::{green::SyntaxKind, GreenNodeBuilder};
use super::*;
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum TestLang {}
impl Language for TestLang {
type Kind = SyntaxKind;
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
raw
}
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
kind
}
}
fn build_tree(chunks: &[&str]) -> (SyntaxNode<TestLang, ()>, impl Resolver) {
let mut builder = GreenNodeBuilder::new();
builder.start_node(SyntaxKind(62));
for &chunk in chunks.iter() {
builder.token(SyntaxKind(92), chunk.into())
}
builder.finish_node();
let (node, interner) = builder.finish();
(SyntaxNode::new_root(node), interner.unwrap())
}
#[test]
fn test_text_equality() {
fn do_check(t1: &[&str], t2: &[&str]) {
let (t1, resolver) = build_tree(t1);
let t1 = t1.text(&resolver);
let (t2, resolver) = build_tree(t2);
let t2 = t2.text(&resolver);
let expected = t1.to_string() == t2.to_string();
let actual = t1 == t2;
assert_eq!(expected, actual, "`{}` (SyntaxText) `{}` (SyntaxText)", t1, t2);
let actual = t1 == &*t2.to_string();
assert_eq!(expected, actual, "`{}` (SyntaxText) `{}` (&str)", t1, t2);
}
fn check(t1: &[&str], t2: &[&str]) {
do_check(t1, t2);
do_check(t2, t1)
}
check(&[""], &[""]);
check(&["a"], &[""]);
check(&["a"], &["a"]);
check(&["abc"], &["def"]);
check(&["hello", "world"], &["hello", "world"]);
check(&["hellowo", "rld"], &["hell", "oworld"]);
check(&["hel", "lowo", "rld"], &["helloworld"]);
check(&["{", "abc", "}"], &["{", "123", "}"]);
check(&["{", "abc", "}", "{"], &["{", "123", "}"]);
check(&["{", "abc", "}"], &["{", "123", "}", "{"]);
check(&["{", "abc", "}ab"], &["{", "abc", "}", "ab"]);
}
}

142
src/utility_types.rs Normal file
View file

@ -0,0 +1,142 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum NodeOrToken<N, T> {
Node(N),
Token(T),
}
impl<N, T> NodeOrToken<N, T> {
pub fn into_node(self) -> Option<N> {
match self {
NodeOrToken::Node(node) => Some(node),
NodeOrToken::Token(_) => None,
}
}
pub fn into_token(self) -> Option<T> {
match self {
NodeOrToken::Node(_) => None,
NodeOrToken::Token(token) => Some(token),
}
}
pub fn as_node(&self) -> Option<&N> {
match self {
NodeOrToken::Node(node) => Some(node),
NodeOrToken::Token(_) => None,
}
}
pub fn as_token(&self) -> Option<&T> {
match self {
NodeOrToken::Node(_) => None,
NodeOrToken::Token(token) => Some(token),
}
}
pub(crate) fn as_ref(&self) -> NodeOrToken<&N, &T> {
match self {
NodeOrToken::Node(node) => NodeOrToken::Node(node),
NodeOrToken::Token(token) => NodeOrToken::Token(token),
}
}
}
impl<N: Clone, T: Clone> NodeOrToken<&N, &T> {
pub(crate) fn cloned(&self) -> NodeOrToken<N, T> {
match *self {
NodeOrToken::Node(node) => NodeOrToken::Node(node.clone()),
NodeOrToken::Token(token) => NodeOrToken::Token(token.clone()),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Direction {
Next,
Prev,
}
/// `WalkEvent` describes tree walking process.
#[derive(Debug, Copy, Clone)]
pub enum WalkEvent<T> {
/// Fired before traversing the node.
Enter(T),
/// Fired after the node is traversed.
Leave(T),
}
impl<T> WalkEvent<T> {
pub fn map<F: FnOnce(T) -> U, U>(self, f: F) -> WalkEvent<U> {
match self {
WalkEvent::Enter(it) => WalkEvent::Enter(f(it)),
WalkEvent::Leave(it) => WalkEvent::Leave(f(it)),
}
}
}
/// There might be zero, one or two leaves at a given offset.
#[derive(Clone, Debug)]
pub enum TokenAtOffset<T> {
/// No leaves at offset -- possible for the empty file.
None,
/// Only a single leaf at offset.
Single(T),
/// Offset is exactly between two leaves.
Between(T, T),
}
impl<T> TokenAtOffset<T> {
pub fn map<F: Fn(T) -> U, U>(self, f: F) -> TokenAtOffset<U> {
match self {
TokenAtOffset::None => TokenAtOffset::None,
TokenAtOffset::Single(it) => TokenAtOffset::Single(f(it)),
TokenAtOffset::Between(l, r) => TokenAtOffset::Between(f(l), f(r)),
}
}
/// Convert to option, preferring the right leaf in case of a tie.
pub fn right_biased(self) -> Option<T> {
match self {
TokenAtOffset::None => None,
TokenAtOffset::Single(node) => Some(node),
TokenAtOffset::Between(_, right) => Some(right),
}
}
/// Convert to option, preferring the left leaf in case of a tie.
pub fn left_biased(self) -> Option<T> {
match self {
TokenAtOffset::None => None,
TokenAtOffset::Single(node) => Some(node),
TokenAtOffset::Between(left, _) => Some(left),
}
}
}
impl<T> Iterator for TokenAtOffset<T> {
type Item = T;
fn next(&mut self) -> Option<T> {
match std::mem::replace(self, TokenAtOffset::None) {
TokenAtOffset::None => None,
TokenAtOffset::Single(node) => {
*self = TokenAtOffset::None;
Some(node)
}
TokenAtOffset::Between(left, right) => {
*self = TokenAtOffset::Single(right);
Some(left)
}
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
match self {
TokenAtOffset::None => (0, Some(0)),
TokenAtOffset::Single(_) => (1, Some(1)),
TokenAtOffset::Between(_, _) => (2, Some(2)),
}
}
}
impl<T> ExactSizeIterator for TokenAtOffset<T> {}

100
tests/basic.rs Normal file
View file

@ -0,0 +1,100 @@
mod common;
use common::TestLang;
use cstree::{GreenNodeBuilder, SyntaxKind, SyntaxNode, TextRange};
use lasso::Resolver;
#[derive(Debug)]
enum Element<'s> {
Node(Vec<Element<'s>>),
Token(&'s str),
}
fn two_level_tree() -> Element<'static> {
use Element::*;
Node(vec![
Node(vec![Token("0.0"), Token("0.1")]),
Node(vec![Token("1.0")]),
Node(vec![Token("2.0"), Token("2.1"), Token("2.2")]),
])
}
fn build_tree<D>(root: &Element<'_>) -> (SyntaxNode<TestLang, D>, impl Resolver) {
let mut builder = GreenNodeBuilder::new();
build_recursive(root, &mut builder, 0);
let (node, interner) = builder.finish();
(SyntaxNode::new_root(node), interner.unwrap())
}
fn build_recursive(root: &Element<'_>, builder: &mut GreenNodeBuilder, mut from: u16) -> u16 {
match root {
Element::Node(children) => {
builder.start_node(SyntaxKind(from));
for child in children {
from = build_recursive(child, builder, from + 1);
}
builder.finish_node();
}
Element::Token(text) => {
builder.token(SyntaxKind(from), *text);
}
}
from
}
#[test]
fn create() {
let tree = two_level_tree();
let (tree, resolver) = build_tree::<()>(&tree);
assert_eq!(tree.syntax_kind(), SyntaxKind(0));
assert_eq!(tree.kind(), SyntaxKind(0));
{
let leaf1_0 = tree.children().nth(1).unwrap().children_with_tokens().nth(0).unwrap();
let leaf1_0 = leaf1_0.into_token().unwrap();
assert_eq!(leaf1_0.syntax_kind(), SyntaxKind(5));
assert_eq!(leaf1_0.kind(), SyntaxKind(5));
assert_eq!(leaf1_0.text(&resolver), "1.0");
assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into()));
}
{
let node2 = tree.children().nth(2).unwrap();
assert_eq!(node2.syntax_kind(), SyntaxKind(6));
assert_eq!(node2.kind(), SyntaxKind(6));
assert_eq!(node2.children_with_tokens().count(), 3);
assert_eq!(node2.text(&resolver), "2.02.12.2");
}
}
#[test]
fn data() {
let tree = two_level_tree();
let (tree, _resolver) = build_tree::<String>(&tree);
{
let node2 = tree.children().nth(2).unwrap();
assert_eq!(*node2.try_set_data("data".into()).unwrap(), "data");
let data = node2.get_data().unwrap();
assert_eq!(data.as_str(), "data");
node2.set_data("payload".into());
let data = node2.get_data().unwrap();
assert_eq!(data.as_str(), "payload");
}
{
let node2 = tree.children().nth(2).unwrap();
assert!(node2.try_set_data("already present".into()).is_err());
let data = node2.get_data().unwrap();
assert_eq!(data.as_str(), "payload");
node2.set_data("new data".into());
}
{
let node2 = tree.children().nth(2).unwrap();
let data = node2.get_data().unwrap();
assert_eq!(data.as_str(), "new data");
node2.clear_data();
// re-use `data` after node data was cleared
assert_eq!(data.as_str(), "new data");
}
{
let node2 = tree.children().nth(2).unwrap();
assert_eq!(node2.get_data(), None);
}
}

15
tests/common.rs Normal file
View file

@ -0,0 +1,15 @@
use cstree::{Language, SyntaxKind};
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum TestLang {}
impl Language for TestLang {
type Kind = SyntaxKind;
fn kind_from_raw(raw: SyntaxKind) -> Self::Kind {
raw
}
fn kind_to_raw(kind: Self::Kind) -> SyntaxKind {
kind
}
}

35
vendor/servo_arc/Cargo.toml vendored Normal file
View file

@ -0,0 +1,35 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g. crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "servo_arc"
version = "0.1.1"
authors = ["The Servo Project Developers"]
description = "A fork of std::sync::Arc with some extra functionality and without weak references"
license = "MIT/Apache-2.0"
repository = "https://github.com/servo/servo"
[lib]
name = "servo_arc"
path = "lib.rs"
[dependencies.nodrop]
version = "0.1.8"
[dependencies.serde]
version = "1.0"
optional = true
[dependencies.stable_deref_trait]
version = "1.0.0"
[features]
servo = ["serde"]

1007
vendor/servo_arc/lib.rs vendored Normal file

File diff suppressed because it is too large Load diff