From ac4f6594705da012424ef3c8948bab113ef7e4bb Mon Sep 17 00:00:00 2001 From: Domenic Quirl Date: Wed, 13 Jan 2021 10:23:34 +0100 Subject: [PATCH] initial commit --- .gitignore | 4 + Cargo.lock | 241 +++++++ Cargo.toml | 23 + LICENSE-APACHE | 201 ++++++ LICENSE-MIT | 23 + README.md | 21 + examples/math.rs | 165 +++++ examples/s_expressions.rs | 456 +++++++++++++ rustfmt.toml | 21 + src/green.rs | 41 ++ src/green/builder.rs | 225 +++++++ src/green/element.rs | 212 ++++++ src/green/node.rs | 220 ++++++ src/green/token.rs | 120 ++++ src/lib.rs | 67 ++ src/serde_impls.rs | 66 ++ src/syntax.rs | 1269 +++++++++++++++++++++++++++++++++++ src/syntax_text.rs | 357 ++++++++++ src/utility_types.rs | 142 ++++ tests/basic.rs | 100 +++ tests/common.rs | 15 + vendor/servo_arc/Cargo.toml | 35 + vendor/servo_arc/lib.rs | 1007 +++++++++++++++++++++++++++ 23 files changed, 5031 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 LICENSE-APACHE create mode 100644 LICENSE-MIT create mode 100644 README.md create mode 100644 examples/math.rs create mode 100644 examples/s_expressions.rs create mode 100644 rustfmt.toml create mode 100644 src/green.rs create mode 100644 src/green/builder.rs create mode 100644 src/green/element.rs create mode 100644 src/green/node.rs create mode 100644 src/green/token.rs create mode 100644 src/lib.rs create mode 100644 src/serde_impls.rs create mode 100644 src/syntax.rs create mode 100644 src/syntax_text.rs create mode 100644 src/utility_types.rs create mode 100644 tests/basic.rs create mode 100644 tests/common.rs create mode 100644 vendor/servo_arc/Cargo.toml create mode 100644 vendor/servo_arc/lib.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6b4fd4c --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.vscode + +target +*checksum* \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..87176f0 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,241 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "ahash" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e" + +[[package]] +name = "aho-corasick" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada" +dependencies = [ + "memchr", +] + +[[package]] +name = "byteorder" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cstree" +version = "0.0.2" +dependencies = [ + "fxhash", + "lasso", + "m_lexer", + "parking_lot", + "serde", + "servo_arc", + "smallvec", + "text-size", +] + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "hashbrown" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +dependencies = [ + "ahash", +] + +[[package]] +name = "instant" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "lasso" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17823787ed7c3f2ce99d4865d41edd4407b2fb6d9e71d534ec69d832a3ec2df3" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89203f3fba0a3795506acaad8ebce3c80c0af93f994d5a1d7a0b1eeb23271929" + +[[package]] +name = "lock_api" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd96ffd135b2fd7b973ac026d28085defbe8983df057ced3eb4f2130b0831312" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "m_lexer" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7e51ebf91162d585a5bae05e4779efc4a276171cb880d61dd6fab11c98467a7" +dependencies = [ + "regex", +] + +[[package]] +name = "memchr" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" + +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + +[[package]] +name = "parking_lot" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ccb628cad4f84851442432c60ad8e1f607e29752d0bf072cbd0baf28aa34272" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + +[[package]] +name = "redox_syscall" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" + +[[package]] +name = "regex" +version = "1.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6946991529684867e47d86474e3a6d0c0ab9b82d5821e314b1ede31fa3a4b3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-syntax" +version = "0.6.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "serde" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36df6ac6412072f67cf767ebbde4133a5b2e88e76dc6187fa7104cd16f783399" + +[[package]] +name = "servo_arc" +version = "0.1.1" +dependencies = [ + "nodrop", + "stable_deref_trait", +] + +[[package]] +name = "smallvec" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "text-size" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f03e7efdedc3bc78cb2337f1e2785c39e45f5ef762d9e4ebb137fff7380a6d8a" +dependencies = [ + "serde", +] + +[[package]] +name = "thread_local" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..13dda96 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,23 @@ +[package] +edition = "2018" +name = "cstree" +version = "0.0.2" +authors = ["Domenic Quirl ", "Aleksey Kladov "] +description = "Library for generic lossless syntax trees" +license = "MIT OR Apache-2.0" +repository = "https://github.com/domenicquirl/cstree" + +[dependencies] +serde = { version = "1.0.89", optional = true, default-features = false } +lasso = "0.4.1" +text-size = "1.0.0" +fxhash= "0.2.1" +smallvec = "1.6.1" +servo_arc = { path = "vendor/servo_arc" } +parking_lot= "0.11.1" + +[dev-dependencies] +m_lexer = "0.0.4" + +[features] +serde1 = ["serde", "text-size/serde"] diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..31aa793 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..79bf21b --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +# `cstree` + +`cstree` is a library for creating and working with concrete syntax trees (CSTs). +The concept of CSTs is inspired in part by Swift's [libsyntax](https://github.com/apple/swift/tree/5e2c815edfd758f9b1309ce07bfc01c4bc20ec23/lib/Syntax). + +The `cstree` implementation is a fork of the excellent [`rowan`](https://github.com/rust-analyzer/rowan/), developed by the authors of [rust-analyzer](https://github.com/rust-analyzer/rust-analyzer/). +While we are building our own documentation, a conceptual overview of their implementation is available in the [rust-analyzer repo](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md#trees). + +Notable differences of `cstree` compared to `rowan`: + - Syntax trees (red trees) are created lazily, but are persistent. Once a node has been created, it will remain allocated, while `rowan` re-creates the red layer on the fly. Apart from the trade-off discussed [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md#memoized-rednodes), this helps to achieve good tree traversal speed while providing the next points: + - Syntax (red) nodes are `Send` and `Sync`, allowing to share realized trees across threads. This is achieved by atomically reference counting syntax trees as a whole, which also gets rid of the need to reference count individual nodes (helping with the point above). + - Syntax nodes can hold custom data. + - `cstree` trees are trees over interned strings. This means `cstree` will deduplicate the text of tokens such as identifiers with the same name. In this position, `rowan` stores each string, with a small string optimization (see [`SmolStr`](https://crates.io/crates/smol_str)). + - Performance optimizations for tree creation: only allocate new nodes on the heap if they are not in cache, avoid recursively hashing subtrees + +See `examples/s_expressions` for a tutorial. +## License + +`cstree` is primarily distributed under the terms of both the MIT license and the Apache License (Version 2.0). + +See `LICENSE-APACHE` and `LICENSE-MIT` for details. diff --git a/examples/math.rs b/examples/math.rs new file mode 100644 index 0000000..6e56a81 --- /dev/null +++ b/examples/math.rs @@ -0,0 +1,165 @@ +//! Example that takes the input +//! 1 + 2 * 3 + 4 +//! and builds the tree +//! - Marker(Root) +//! - Marker(Operation) +//! - Marker(Operation) +//! - "1" Token(Number) +//! - "+" Token(Add) +//! - Marker(Operation) +//! - "2" Token(Number) +//! - "*" Token(Mul) +//! - "3" Token(Number) +//! - "+" Token(Add) +//! - "4" Token(Number) + +use cstree::{ + interning::{Reader, Resolver}, + GreenNodeBuilder, NodeOrToken, +}; +use std::iter::Peekable; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[allow(non_camel_case_types)] +#[repr(u16)] +enum SyntaxKind { + WHITESPACE = 0, + + ADD, + SUB, + MUL, + DIV, + + NUMBER, + ERROR, + OPERATION, + ROOT, +} +use SyntaxKind::*; + +impl From for cstree::SyntaxKind { + fn from(kind: SyntaxKind) -> Self { + Self(kind as u16) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum Lang {} +impl cstree::Language for Lang { + type Kind = SyntaxKind; + + fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind { + assert!(raw.0 <= ROOT as u16); + unsafe { std::mem::transmute::(raw.0) } + } + + fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind { + kind.into() + } +} + +type SyntaxNode = cstree::SyntaxNode; +#[allow(unused)] +type SyntaxToken = cstree::SyntaxToken; +#[allow(unused)] +type SyntaxElement = cstree::NodeOrToken; +type SyntaxElementRef<'a> = cstree::NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>; + +struct Parser<'input, I: Iterator> { + builder: GreenNodeBuilder<'static>, + iter: Peekable, +} +impl<'input, I: Iterator> Parser<'input, I> { + fn peek(&mut self) -> Option { + while self.iter.peek().map(|&(t, _)| t == WHITESPACE).unwrap_or(false) { + self.bump(); + } + self.iter.peek().map(|&(t, _)| t) + } + + fn bump(&mut self) { + if let Some((token, string)) = self.iter.next() { + self.builder.token(token.into(), string); + } + } + + fn parse_val(&mut self) { + match self.peek() { + Some(NUMBER) => self.bump(), + _ => { + self.builder.start_node(ERROR.into()); + self.bump(); + self.builder.finish_node(); + } + } + } + + fn handle_operation(&mut self, tokens: &[SyntaxKind], next: fn(&mut Self)) { + let checkpoint = self.builder.checkpoint(); + next(self); + while self.peek().map(|t| tokens.contains(&t)).unwrap_or(false) { + self.builder.start_node_at(checkpoint, OPERATION.into()); + self.bump(); + next(self); + self.builder.finish_node(); + } + } + + fn parse_mul(&mut self) { + self.handle_operation(&[MUL, DIV], Self::parse_val) + } + + fn parse_add(&mut self) { + self.handle_operation(&[ADD, SUB], Self::parse_mul) + } + + fn parse(mut self) -> (SyntaxNode, impl Resolver) { + self.builder.start_node(ROOT.into()); + self.parse_add(); + self.builder.finish_node(); + + let (tree, resolver) = self.builder.finish(); + (SyntaxNode::new_root(tree), resolver.unwrap().into_resolver()) + } +} + +fn print(indent: usize, element: SyntaxElementRef<'_>, resolver: &impl Resolver) { + let kind: SyntaxKind = element.kind().into(); + print!("{:indent$}", "", indent = indent); + match element { + NodeOrToken::Node(node) => { + println!("- {:?}", kind); + for child in node.children_with_tokens() { + print(indent + 2, child, resolver); + } + } + + NodeOrToken::Token(token) => println!("- {:?} {:?}", token.text(resolver), kind), + } +} + +fn main() { + let (ast, resolver) = Parser { + builder: GreenNodeBuilder::new(), + iter: vec![ + // 1 + 2 * 3 + 4 + (NUMBER, "1".into()), + (WHITESPACE, " ".into()), + (ADD, "+".into()), + (WHITESPACE, " ".into()), + (NUMBER, "2".into()), + (WHITESPACE, " ".into()), + (MUL, "*".into()), + (WHITESPACE, " ".into()), + (NUMBER, "3".into()), + (WHITESPACE, " ".into()), + (ADD, "+".into()), + (WHITESPACE, " ".into()), + (NUMBER, "4".into()), + ] + .into_iter() + .peekable(), + } + .parse(); + print(0, (&ast).into(), &resolver); +} diff --git a/examples/s_expressions.rs b/examples/s_expressions.rs new file mode 100644 index 0000000..5142283 --- /dev/null +++ b/examples/s_expressions.rs @@ -0,0 +1,456 @@ +//! In this tutorial, we will write parser +//! and evaluator of arithmetic S-expressions, +//! which look like this: +//! ``` +//! (+ (* 15 2) 62) +//! ``` +//! +//! It's suggested to read the conceptual overview of the design +//! alongside this tutorial: +//! https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md + +/// cstree uses `TextSize` and `TextRange` types to +/// represent utf8 offsets and ranges. + +/// Let's start with defining all kinds of tokens and +/// composite nodes. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[allow(non_camel_case_types)] +#[repr(u16)] +enum SyntaxKind { + L_PAREN = 0, // '(' + R_PAREN, // ')' + WORD, // '+', '15' + WHITESPACE, // whitespaces is explicit + ERROR, // as well as errors + + // composite nodes + LIST, // `(+ 2 3)` + ATOM, // `+`, `15`, wraps a WORD token + ROOT, // top-level node: a list of s-expressions +} +use SyntaxKind::*; + +/// Some boilerplate is needed, as cstree settled on using its own +/// `struct SyntaxKind(u16)` internally, instead of accepting the +/// user's `enum SyntaxKind` as a type parameter. +/// +/// First, to easily pass the enum variants into cstree via `.into()`: +impl From for cstree::SyntaxKind { + fn from(kind: SyntaxKind) -> Self { + Self(kind as u16) + } +} + +/// Second, implementing the `Language` trait teaches cstree to convert between +/// these two SyntaxKind types, allowing for a nicer SyntaxNode API where +/// "kinds" are values from our `enum SyntaxKind`, instead of plain u16 values. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum Lang {} +impl cstree::Language for Lang { + type Kind = SyntaxKind; + + fn kind_from_raw(raw: cstree::SyntaxKind) -> Self::Kind { + assert!(raw.0 <= ROOT as u16); + unsafe { std::mem::transmute::(raw.0) } + } + + fn kind_to_raw(kind: Self::Kind) -> cstree::SyntaxKind { + kind.into() + } +} + +/// GreenNode is an immutable tree, which is cheap to change, +/// but doesn't contain offsets and parent pointers. +use cstree::{ + interning::{Reader, Resolver}, + GreenNode, +}; + +/// You can construct GreenNodes by hand, but a builder +/// is helpful for top-down parsers: it maintains a stack +/// of currently in-progress nodes +use cstree::GreenNodeBuilder; + +/// The parse results are stored as a "green tree". +/// We'll discuss working with the results later +struct Parse { + green_node: GreenNode, + resolver: I, + #[allow(unused)] + errors: Vec, +} + +/// Now, let's write a parser. +/// Note that `parse` does not return a `Result`: +/// by design, syntax tree can be built even for +/// completely invalid source code. +fn parse(text: &str) -> Parse { + struct Parser<'input> { + /// input tokens, including whitespace, + /// in *reverse* order. + tokens: Vec<(SyntaxKind, &'input str)>, + /// the in-progress tree. + builder: GreenNodeBuilder<'static>, + /// the list of syntax errors we've accumulated + /// so far. + errors: Vec, + } + + /// The outcome of parsing a single S-expression + enum SexpRes { + /// An S-expression (i.e. an atom, or a list) was successfully parsed + Ok, + /// Nothing was parsed, as no significant tokens remained + Eof, + /// An unexpected ')' was found + RParen, + } + + impl Parser<'_> { + fn parse(mut self) -> Parse { + // Make sure that the root node covers all source + self.builder.start_node(ROOT.into()); + // Parse zero or more S-expressions + loop { + match self.sexp() { + SexpRes::Eof => break, + SexpRes::RParen => { + self.builder.start_node(ERROR.into()); + self.errors.push("unmatched `)`".to_string()); + self.bump(); // be sure to chug along in case of error + self.builder.finish_node(); + } + SexpRes::Ok => (), + } + } + // Don't forget to eat *trailing* whitespace + self.skip_ws(); + // Close the root node. + self.builder.finish_node(); + + // Turn the builder into a GreenNode + let (tree, resolver) = self.builder.finish(); + Parse { + green_node: tree, + resolver: resolver.unwrap().into_resolver(), + errors: self.errors, + } + } + + fn list(&mut self) { + assert_eq!(self.current(), Some(L_PAREN)); + // Start the list node + self.builder.start_node(LIST.into()); + self.bump(); // '(' + loop { + match self.sexp() { + SexpRes::Eof => { + self.errors.push("expected `)`".to_string()); + break; + } + SexpRes::RParen => { + self.bump(); + break; + } + SexpRes::Ok => (), + } + } + // close the list node + self.builder.finish_node(); + } + + fn sexp(&mut self) -> SexpRes { + // Eat leading whitespace + self.skip_ws(); + // Either a list, an atom, a closing paren, + // or an eof. + let t = match self.current() { + None => return SexpRes::Eof, + Some(R_PAREN) => return SexpRes::RParen, + Some(t) => t, + }; + match t { + L_PAREN => self.list(), + WORD => { + self.builder.start_node(ATOM.into()); + self.bump(); + self.builder.finish_node(); + } + ERROR => self.bump(), + _ => unreachable!(), + } + SexpRes::Ok + } + + /// Advance one token, adding it to the current branch of the tree builder. + fn bump(&mut self) { + let (kind, text) = self.tokens.pop().unwrap(); + self.builder.token(kind.into(), text); + } + + /// Peek at the first unprocessed token + fn current(&self) -> Option { + self.tokens.last().map(|(kind, _)| *kind) + } + + fn skip_ws(&mut self) { + while self.current() == Some(WHITESPACE) { + self.bump() + } + } + } + + let mut tokens = lex(text); + tokens.reverse(); + Parser { + tokens, + builder: GreenNodeBuilder::new(), + errors: Vec::new(), + } + .parse() +} + +/// To work with the parse results we need a view into the +/// green tree - the Syntax tree. +/// It is also immutable, like a GreenNode, +/// but it contains parent pointers, offsets, and +/// has identity semantics. + +type SyntaxNode = cstree::SyntaxNode; +#[allow(unused)] +type SyntaxToken = cstree::SyntaxToken; +#[allow(unused)] +type SyntaxElement = cstree::NodeOrToken; + +impl Parse { + fn syntax(&self) -> SyntaxNode { + SyntaxNode::new_root(self.green_node.clone()) + } +} + +/// Let's check that the parser works as expected +#[test] +fn test_parser() { + let text = "(+ (* 15 2) 62)"; + let parse = parse(text); + let node = parse.syntax(); + let resolver = &parse.resolver; + assert_eq!( + node.debug(resolver, false), + "ROOT@0..15", // root node, spanning 15 bytes + ); + assert_eq!(node.children().count(), 1); + let list = node.children().next().unwrap(); + let children = list + .children_with_tokens() + .map(|child| format!("{:?}@{:?}", child.kind(), child.text_range())) + .collect::>(); + + assert_eq!( + children, + vec![ + "L_PAREN@0..1".to_string(), + "ATOM@1..2".to_string(), + "WHITESPACE@2..3".to_string(), // note, explicit whitespace! + "LIST@3..11".to_string(), + "WHITESPACE@11..12".to_string(), + "ATOM@12..14".to_string(), + "R_PAREN@14..15".to_string(), + ] + ); +} + +/// So far, we've been working with a homogeneous untyped tree. +/// It's nice to provide generic tree operations, like traversals, +/// but it's a bad fit for semantic analysis. +/// This crate itself does not provide AST facilities directly, +/// but it is possible to layer AST on top of `SyntaxNode` API. +/// Let's write a function to evaluate S-expression. +/// +/// For that, let's define AST nodes. +/// It'll be quite a bunch of repetitive code, so we'll use a macro. +/// +/// For a real language, you'd want to generate an AST. I find a +/// combination of `serde`, `ron` and `tera` crates invaluable for that! +macro_rules! ast_node { + ($ast:ident, $kind:ident) => { + #[derive(PartialEq, Eq, Hash)] + #[repr(transparent)] + struct $ast(SyntaxNode); + impl $ast { + #[allow(unused)] + fn cast(node: SyntaxNode) -> Option { + if node.kind() == $kind { + Some(Self(node)) + } else { + None + } + } + } + }; +} + +ast_node!(Root, ROOT); +ast_node!(Atom, ATOM); +ast_node!(List, LIST); + +// Sexp is slightly different, so let's do it by hand. +#[derive(PartialEq, Eq, Hash)] +#[repr(transparent)] +struct Sexp(SyntaxNode); + +enum SexpKind { + Atom(Atom), + List(List), +} + +impl Sexp { + fn cast(node: SyntaxNode) -> Option { + if Atom::cast(node.clone()).is_some() || List::cast(node.clone()).is_some() { + Some(Sexp(node)) + } else { + None + } + } + + fn kind(&self) -> SexpKind { + Atom::cast(self.0.clone()) + .map(SexpKind::Atom) + .or_else(|| List::cast(self.0.clone()).map(SexpKind::List)) + .unwrap() + } +} + +// Let's enhance AST nodes with ancillary functions and +// eval. +impl Root { + fn sexps(&self) -> impl Iterator + '_ { + self.0.children().cloned().filter_map(Sexp::cast) + } +} + +enum Op { + Add, + Sub, + Div, + Mul, +} + +impl Atom { + fn eval(&self, resolver: &impl Resolver) -> Option { + self.text(resolver).parse().ok() + } + + fn as_op(&self, resolver: &impl Resolver) -> Option { + let op = match self.text(resolver) { + "+" => Op::Add, + "-" => Op::Sub, + "*" => Op::Mul, + "/" => Op::Div, + _ => return None, + }; + Some(op) + } + + fn text<'r>(&self, resolver: &'r impl Resolver) -> &'r str { + match &self.0.green().children().next() { + Some(cstree::NodeOrToken::Token(token)) => token.text(resolver), + _ => unreachable!(), + } + } +} + +impl List { + fn sexps(&self) -> impl Iterator + '_ { + self.0.children().cloned().filter_map(Sexp::cast) + } + + fn eval(&self, resolver: &impl Resolver) -> Option { + let op = match self.sexps().nth(0)?.kind() { + SexpKind::Atom(atom) => atom.as_op(resolver)?, + _ => return None, + }; + let arg1 = self.sexps().nth(1)?.eval(resolver)?; + let arg2 = self.sexps().nth(2)?.eval(resolver)?; + let res = match op { + Op::Add => arg1 + arg2, + Op::Sub => arg1 - arg2, + Op::Mul => arg1 * arg2, + Op::Div if arg2 == 0 => return None, + Op::Div => arg1 / arg2, + }; + Some(res) + } +} + +impl Sexp { + fn eval(&self, resolver: &impl Resolver) -> Option { + match self.kind() { + SexpKind::Atom(atom) => atom.eval(resolver), + SexpKind::List(list) => list.eval(resolver), + } + } +} + +impl Parse { + fn root(&self) -> Root { + Root::cast(self.syntax()).unwrap() + } +} + +/// Let's test the eval! +fn main() { + let sexps = " +92 +(+ 62 30) +(/ 92 0) +nan +(+ (* 15 2) 62) +"; + let parse = parse(sexps); + let root = parse.root(); + let resolver = &parse.resolver; + let res = root.sexps().map(|it| it.eval(resolver)).collect::>(); + eprintln!("{:?}", res); + assert_eq!(res, vec![Some(92), Some(92), None, None, Some(92),]) +} + +/// Split the input string into a flat list of tokens +/// (such as L_PAREN, WORD, and WHITESPACE) +fn lex(text: &str) -> Vec<(SyntaxKind, &str)> { + fn tok(t: SyntaxKind) -> m_lexer::TokenKind { + m_lexer::TokenKind(cstree::SyntaxKind::from(t).0) + } + fn kind(t: m_lexer::TokenKind) -> SyntaxKind { + match t.0 { + 0 => L_PAREN, + 1 => R_PAREN, + 2 => WORD, + 3 => WHITESPACE, + 4 => ERROR, + _ => unreachable!(), + } + } + + let lexer = m_lexer::LexerBuilder::new() + .error_token(tok(ERROR)) + .tokens(&[ + (tok(L_PAREN), r"\("), + (tok(R_PAREN), r"\)"), + (tok(WORD), r"[^\s()]+"), + (tok(WHITESPACE), r"\s+"), + ]) + .build(); + + lexer + .tokenize(text) + .into_iter() + .map(|t| (t.len, kind(t.kind))) + .scan(0usize, |start_offset, (len, kind)| { + let s = &text[*start_offset..*start_offset + len]; + *start_offset += len; + Some((kind, s)) + }) + .collect() +} diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..4891004 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,21 @@ +unstable_features = true + +edition = "2018" + +max_width = 120 +comment_width = 120 +wrap_comments = true + +format_code_in_doc_comments = true +format_macro_matchers = true + +merge_imports = true + +reorder_impl_items = true + +use_field_init_shorthand = true + +# should be 1, but as of writing is too unstable and introduces blank lines at the start of random blocks +blank_lines_lower_bound = 0 + +struct_field_align_threshold = 8 diff --git a/src/green.rs b/src/green.rs new file mode 100644 index 0000000..6b004ad --- /dev/null +++ b/src/green.rs @@ -0,0 +1,41 @@ +mod node; +mod token; +mod element; +mod builder; + +pub(crate) use self::element::GreenElementRef; +use self::element::{GreenElement, PackedGreenElement}; + +pub use self::{ + builder::{Checkpoint, GreenNodeBuilder, NodeCache}, + node::{Children, GreenNode}, + token::GreenToken, +}; + +/// SyntaxKind is a type tag for each token or node. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SyntaxKind(pub u16); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn assert_send_sync() { + fn f() {} + f::(); + f::(); + f::(); + f::(); + } + + #[test] + fn test_size_of() { + use std::mem::size_of; + + eprintln!("GreenNode {}", size_of::()); + eprintln!("GreenToken {}", size_of::()); + eprintln!("GreenElement {}", size_of::()); + eprintln!("PackedGreenElement {}", size_of::()); + } +} diff --git a/src/green/builder.rs b/src/green/builder.rs new file mode 100644 index 0000000..8e23220 --- /dev/null +++ b/src/green/builder.rs @@ -0,0 +1,225 @@ +use std::{convert::TryFrom, num::NonZeroUsize}; + +use fxhash::{FxBuildHasher, FxHashMap}; +use lasso::{Capacity, Rodeo, Spur}; +use smallvec::SmallVec; +use text_size::TextSize; + +use crate::{ + green::{GreenElement, GreenNode, GreenToken, SyntaxKind}, + interning::Interner, + NodeOrToken, +}; + +use super::{node::GreenNodeHead, token::GreenTokenData}; + +#[derive(Debug)] +pub struct NodeCache { + nodes: FxHashMap, + tokens: FxHashMap, + interner: Rodeo, +} + +impl NodeCache { + pub fn new() -> Self { + Self { + nodes: FxHashMap::default(), + tokens: FxHashMap::default(), + interner: Rodeo::with_capacity_and_hasher( + // capacity values suggested by author of `lasso` + Capacity::new(512, unsafe { NonZeroUsize::new_unchecked(4096) }), + FxBuildHasher::default(), + ), + } + } + + fn node(&mut self, kind: SyntaxKind, children: I) -> GreenNode + where + I: IntoIterator, + I::IntoIter: ExactSizeIterator, + { + let children = children.into_iter(); + // Green nodes are fully immutable, so it's ok to deduplicate them. + // This is the same optimization that Roslyn does + // https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees + // + // For example, all `#[inline]` in this file share the same green node! + // For `libsyntax/parse/parser.rs`, measurements show that deduping saves + // 17% of the memory for green nodes! + if children.len() <= 3 { + let children: SmallVec<[_; 3]> = children.collect(); + let head = GreenNodeHead::from_child_slice(kind, children.as_ref()); + self.nodes + .entry(head.clone()) + .or_insert_with(|| GreenNode::from_head_and_children(head, children)) + .clone() + } else { + GreenNode::new(kind, children) + } + } + + fn token(&mut self, kind: SyntaxKind, text: &str) -> GreenToken { + let text_len = TextSize::try_from(text.len()).unwrap(); + let text = self.interner.get_or_intern(text); + let data = GreenTokenData { kind, text, text_len }; + self.tokens + .entry(data.clone()) + .or_insert_with(|| GreenToken::new(data)) + .clone() + } +} + +#[derive(Debug)] +enum MaybeOwned<'a, T> { + Owned(T), + Borrowed(&'a mut T), +} + +impl std::ops::Deref for MaybeOwned<'_, T> { + type Target = T; + + fn deref(&self) -> &T { + match self { + MaybeOwned::Owned(it) => it, + MaybeOwned::Borrowed(it) => *it, + } + } +} + +impl std::ops::DerefMut for MaybeOwned<'_, T> { + fn deref_mut(&mut self) -> &mut T { + match self { + MaybeOwned::Owned(it) => it, + MaybeOwned::Borrowed(it) => *it, + } + } +} + +impl Default for MaybeOwned<'_, T> { + fn default() -> Self { + MaybeOwned::Owned(T::default()) + } +} + +/// A checkpoint for maybe wrapping a node. See `GreenNodeBuilder::checkpoint` for details. +#[derive(Clone, Copy, Debug)] +pub struct Checkpoint(usize); + +/// A builder for a green tree. +#[derive(Debug)] +pub struct GreenNodeBuilder<'cache> { + cache: MaybeOwned<'cache, NodeCache>, + parents: Vec<(SyntaxKind, usize)>, + children: Vec, +} + +impl GreenNodeBuilder<'_> { + /// Creates new builder. + pub fn new() -> GreenNodeBuilder<'static> { + GreenNodeBuilder { + cache: MaybeOwned::Owned(NodeCache::new()), + parents: Vec::with_capacity(8), + children: Vec::with_capacity(8), + } + } + + /// Reusing `NodeCache` between different `GreenNodeBuilder`s saves memory. + /// It allows to structurally share underlying trees. + pub fn with_cache(cache: &mut NodeCache) -> GreenNodeBuilder<'_> { + GreenNodeBuilder { + cache: MaybeOwned::Borrowed(cache), + parents: Vec::with_capacity(8), + children: Vec::with_capacity(8), + } + } + + /// Adds new token to the current branch. + #[inline] + pub fn token(&mut self, kind: SyntaxKind, text: &str) { + let token = self.cache.token(kind, text); + self.children.push(token.into()); + } + + /// Start new node and make it current. + #[inline] + pub fn start_node(&mut self, kind: SyntaxKind) { + let len = self.children.len(); + self.parents.push((kind, len)); + } + + /// Finish current branch and restore previous + /// branch as current. + #[inline] + pub fn finish_node(&mut self) { + let (kind, first_child) = self.parents.pop().unwrap(); + let children = self.children.drain(first_child..); + let node = self.cache.node(kind, children); + self.children.push(node.into()); + } + + /// Prepare for maybe wrapping the next node. + /// The way wrapping works is that you first of all get a checkpoint, + /// then you place all tokens you want to wrap, and then *maybe* call + /// `start_node_at`. + /// Example: + /// ```rust + /// # use cstree::{GreenNodeBuilder, SyntaxKind}; + /// # const PLUS: SyntaxKind = SyntaxKind(0); + /// # const OPERATION: SyntaxKind = SyntaxKind(1); + /// # struct Parser; + /// # impl Parser { + /// # fn peek(&self) -> Option { None } + /// # fn parse_expr(&mut self) {} + /// # } + /// # let mut builder = GreenNodeBuilder::new(); + /// # let mut parser = Parser; + /// let checkpoint = builder.checkpoint(); + /// parser.parse_expr(); + /// if parser.peek() == Some(PLUS) { + /// // 1 + 2 = Add(1, 2) + /// builder.start_node_at(checkpoint, OPERATION); + /// parser.parse_expr(); + /// builder.finish_node(); + /// } + /// ``` + #[inline] + pub fn checkpoint(&self) -> Checkpoint { + Checkpoint(self.children.len()) + } + + /// Wrap the previous branch marked by `checkpoint` in a new branch and + /// make it current. + #[inline] + pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) { + let Checkpoint(checkpoint) = checkpoint; + assert!( + checkpoint <= self.children.len(), + "checkpoint no longer valid, was finish_node called early?" + ); + + if let Some(&(_, first_child)) = self.parents.last() { + assert!( + checkpoint >= first_child, + "checkpoint no longer valid, was an unmatched start_node_at called?" + ); + } + + self.parents.push((kind, checkpoint)); + } + + /// Complete tree building. Make sure that + /// `start_node_at` and `finish_node` calls + /// are paired! + #[inline] + pub fn finish(mut self) -> (GreenNode, Option>) { + assert_eq!(self.children.len(), 1); + let resolver = match self.cache { + MaybeOwned::Owned(cache) => Some(cache.interner), + MaybeOwned::Borrowed(_) => None, + }; + match self.children.pop().unwrap() { + NodeOrToken::Node(node) => (node, resolver), + NodeOrToken::Token(_) => panic!(), + } + } +} diff --git a/src/green/element.rs b/src/green/element.rs new file mode 100644 index 0000000..2ed2fca --- /dev/null +++ b/src/green/element.rs @@ -0,0 +1,212 @@ +use std::{fmt, hash, mem}; + +// NOTE: From `thin_dst`: +// This MUST be size=1 such that pointer math actually advances the pointer. +type ErasedPtr = *const u8; + +use crate::{ + green::{GreenNode, GreenToken, SyntaxKind}, + NodeOrToken, TextSize, +}; + +pub(super) type GreenElement = NodeOrToken; +pub(crate) type GreenElementRef<'a> = NodeOrToken<&'a GreenNode, &'a GreenToken>; + +#[repr(transparent)] +pub(super) struct PackedGreenElement { + ptr: ErasedPtr, +} + +impl From for GreenElement { + #[inline] + fn from(node: GreenNode) -> GreenElement { + NodeOrToken::Node(node) + } +} + +impl<'a> From<&'a GreenNode> for GreenElementRef<'a> { + #[inline] + fn from(node: &'a GreenNode) -> GreenElementRef<'a> { + NodeOrToken::Node(node) + } +} + +impl From for PackedGreenElement { + #[inline] + fn from(node: GreenNode) -> PackedGreenElement { + unsafe { mem::transmute(node) } + } +} + +impl From for GreenElement { + #[inline] + fn from(token: GreenToken) -> GreenElement { + NodeOrToken::Token(token) + } +} + +impl<'a> From<&'a GreenToken> for GreenElementRef<'a> { + #[inline] + fn from(token: &'a GreenToken) -> GreenElementRef<'a> { + NodeOrToken::Token(token) + } +} + +impl From for PackedGreenElement { + #[inline] + fn from(token: GreenToken) -> PackedGreenElement { + unsafe { mem::transmute(token) } + } +} + +impl GreenElement { + /// Returns kind of this element. + #[inline] + pub fn kind(&self) -> SyntaxKind { + self.as_ref().kind() + } + + /// Returns the length of the text covered by this element. + #[inline] + pub fn text_len(&self) -> TextSize { + self.as_ref().text_len() + } +} + +impl GreenElementRef<'_> { + /// Returns kind of this element. + #[inline] + pub fn kind(&self) -> SyntaxKind { + match self { + NodeOrToken::Node(it) => it.kind(), + NodeOrToken::Token(it) => it.kind(), + } + } + + /// Returns the length of the text covered by this element. + #[inline] + pub fn text_len(self) -> TextSize { + match self { + NodeOrToken::Node(it) => it.text_len(), + NodeOrToken::Token(it) => it.text_len(), + } + } +} + +impl From for PackedGreenElement { + fn from(element: GreenElement) -> Self { + match element { + NodeOrToken::Node(node) => node.into(), + NodeOrToken::Token(token) => token.into(), + } + } +} + +impl From for GreenElement { + fn from(element: PackedGreenElement) -> Self { + if element.is_node() { + NodeOrToken::Node(element.into_node().unwrap()) + } else { + NodeOrToken::Token(element.into_token().unwrap()) + } + } +} + +impl PackedGreenElement { + fn is_node(&self) -> bool { + self.ptr as usize & 1 == 0 + } + + pub(crate) fn as_node(&self) -> Option<&GreenNode> { + if self.is_node() { + unsafe { Some(&*(&self.ptr as *const ErasedPtr as *const GreenNode)) } + } else { + None + } + } + + pub(crate) fn into_node(self) -> Option { + if self.is_node() { + unsafe { Some(mem::transmute(self)) } + } else { + None + } + } + + pub(crate) fn as_token(&self) -> Option<&GreenToken> { + if !self.is_node() { + unsafe { Some(&*(&self.ptr as *const ErasedPtr as *const GreenToken)) } + } else { + None + } + } + + pub(crate) fn into_token(self) -> Option { + if !self.is_node() { + unsafe { Some(mem::transmute(self)) } + } else { + None + } + } + + pub(crate) fn as_ref(&self) -> GreenElementRef<'_> { + if self.is_node() { + NodeOrToken::Node(self.as_node().unwrap()) + } else { + NodeOrToken::Token(self.as_token().unwrap()) + } + } +} + +impl fmt::Debug for PackedGreenElement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_node() { + self.as_node().unwrap().fmt(f) + } else { + self.as_token().unwrap().fmt(f) + } + } +} + +impl Eq for PackedGreenElement {} +impl PartialEq for PackedGreenElement { + fn eq(&self, other: &Self) -> bool { + self.as_node() == other.as_node() && self.as_token() == other.as_token() + } +} + +impl hash::Hash for PackedGreenElement { + fn hash(&self, state: &mut H) + where + H: hash::Hasher, + { + if self.is_node() { + self.as_node().unwrap().hash(state) + } else { + self.as_token().unwrap().hash(state) + } + } +} + +impl Drop for PackedGreenElement { + fn drop(&mut self) { + if self.is_node() { + PackedGreenElement { ptr: self.ptr }.into_node(); + } else { + PackedGreenElement { ptr: self.ptr }.into_token(); + } + } +} + +unsafe impl Send for PackedGreenElement +where + GreenToken: Send, + GreenNode: Send, +{ +} +unsafe impl Sync for PackedGreenElement +where + GreenToken: Sync, + GreenNode: Sync, +{ +} diff --git a/src/green/node.rs b/src/green/node.rs new file mode 100644 index 0000000..11bf5c8 --- /dev/null +++ b/src/green/node.rs @@ -0,0 +1,220 @@ +use std::{ + hash::{Hash, Hasher}, + iter::FusedIterator, + slice, +}; + +use fxhash::FxHasher32; +use servo_arc::{Arc, HeaderSlice, HeaderWithLength, ThinArc}; + +use crate::{ + green::{GreenElement, GreenElementRef, PackedGreenElement, SyntaxKind}, + TextSize, +}; + +#[repr(align(2))] // NB: this is an at-least annotation +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(super) struct GreenNodeHead { + kind: SyntaxKind, + text_len: TextSize, + child_hash: u32, +} + +impl GreenNodeHead { + #[inline] + pub(super) fn from_child_slice(kind: SyntaxKind, children: &[GreenElement]) -> Self { + let mut hasher = FxHasher32::default(); + let mut text_len: TextSize = 0.into(); + for child in children { + text_len += child.text_len(); + child.hash(&mut hasher); + } + Self { + kind, + text_len, + child_hash: hasher.finish() as u32, + } + } +} + +/// Internal node in the immutable tree. +/// It has other nodes and tokens as children. +#[derive(Clone, PartialEq, Eq)] +pub struct GreenNode { + pub(super) data: ThinArc, +} + +impl std::fmt::Debug for GreenNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.data.with_arc(|data| data.fmt(f)) + } +} + +impl GreenNode { + /// Creates new Node. + #[inline] + pub fn new(kind: SyntaxKind, children: I) -> GreenNode + where + I: IntoIterator, + I::IntoIter: ExactSizeIterator, + { + let mut hasher = FxHasher32::default(); + let mut text_len: TextSize = 0.into(); + let children = children + .into_iter() + .inspect(|it| { + text_len += it.text_len(); + it.hash(&mut hasher); + }) + .map(PackedGreenElement::from); + let header = HeaderWithLength::new( + GreenNodeHead { + kind, + text_len: 0.into(), + child_hash: 0, + }, + children.len(), + ); + let mut data = Arc::from_header_and_iter(header, children); + + // XXX: fixup `text_len` and `child_hash` after construction, because + // we can't iterate `children` twice. + let header = &mut Arc::get_mut(&mut data).unwrap().header.header; + header.text_len = text_len; + header.child_hash = hasher.finish() as u32; + GreenNode { + data: Arc::into_thin(data), + } + } + + #[inline] + pub(super) fn from_head_and_children(header: GreenNodeHead, children: I) -> GreenNode + where + I: IntoIterator, + I::IntoIter: ExactSizeIterator, + { + let children = children.into_iter().map(PackedGreenElement::from); + let header = HeaderWithLength::new(header, children.len()); + GreenNode { + data: Arc::into_thin(Arc::from_header_and_iter(header, children)), + } + } + + /// Kind of this node. + #[inline] + pub fn kind(&self) -> SyntaxKind { + self.data.header.header.kind + } + + /// Returns the length of the text covered by this node. + #[inline] + pub fn text_len(&self) -> TextSize { + self.data.header.header.text_len + } + + /// Children of this node. + #[inline] + pub fn children(&self) -> Children<'_> { + Children { + inner: self.data.slice.iter(), + } + } + + pub(crate) fn ptr(&self) -> *const u8 { + let r: &HeaderSlice<_, _> = &self.data; + r as *const _ as _ + } +} + +impl Hash for GreenNode { + #[inline] + fn hash(&self, state: &mut H) { + self.data.header.header.hash(state); + } +} + +#[derive(Debug, Clone)] +pub struct Children<'a> { + inner: slice::Iter<'a, PackedGreenElement>, +} + +// NB: forward everything stable that iter::Slice specializes as of Rust 1.39.0 +impl ExactSizeIterator for Children<'_> { + #[inline(always)] + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a> Iterator for Children<'a> { + type Item = GreenElementRef<'a>; + + #[inline] + fn next(&mut self) -> Option> { + self.inner.next().map(PackedGreenElement::as_ref) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } + + #[inline] + fn count(self) -> usize + where + Self: Sized, + { + self.inner.count() + } + + #[inline] + fn nth(&mut self, n: usize) -> Option { + self.inner.nth(n).map(PackedGreenElement::as_ref) + } + + #[inline] + fn last(mut self) -> Option + where + Self: Sized, + { + self.next_back() + } + + #[inline] + fn fold(mut self, init: Acc, mut f: Fold) -> Acc + where + Fold: FnMut(Acc, Self::Item) -> Acc, + { + let mut accum = init; + while let Some(x) = self.next() { + accum = f(accum, x); + } + accum + } +} + +impl<'a> DoubleEndedIterator for Children<'a> { + #[inline] + fn next_back(&mut self) -> Option { + self.inner.next_back().map(PackedGreenElement::as_ref) + } + + #[inline] + fn nth_back(&mut self, n: usize) -> Option { + self.inner.nth_back(n).map(PackedGreenElement::as_ref) + } + + #[inline] + fn rfold(mut self, init: Acc, mut f: Fold) -> Acc + where + Fold: FnMut(Acc, Self::Item) -> Acc, + { + let mut accum = init; + while let Some(x) = self.next_back() { + accum = f(accum, x); + } + accum + } +} + +impl FusedIterator for Children<'_> {} diff --git a/src/green/token.rs b/src/green/token.rs new file mode 100644 index 0000000..0fda7e5 --- /dev/null +++ b/src/green/token.rs @@ -0,0 +1,120 @@ +use servo_arc::Arc; +use std::{fmt, hash, mem::ManuallyDrop, ptr}; + +use crate::{green::SyntaxKind, interning::Resolver, TextSize}; +use lasso::Spur; + +#[repr(align(2))] // NB: this is an at-least annotation +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +pub struct GreenTokenData { + pub kind: SyntaxKind, + pub text: Spur, + pub text_len: TextSize, +} + +/// Leaf node in the immutable tree. +pub struct GreenToken { + ptr: ptr::NonNull, +} + +unsafe impl Send for GreenToken {} // where GreenTokenData: Send + Sync +unsafe impl Sync for GreenToken {} // where GreenTokenData: Send + Sync + +impl GreenToken { + fn add_tag(ptr: ptr::NonNull) -> ptr::NonNull { + unsafe { + let ptr = ((ptr.as_ptr() as usize) | 1) as *mut GreenTokenData; + ptr::NonNull::new_unchecked(ptr) + } + } + + fn remove_tag(ptr: ptr::NonNull) -> ptr::NonNull { + unsafe { + let ptr = ((ptr.as_ptr() as usize) & !1) as *mut GreenTokenData; + ptr::NonNull::new_unchecked(ptr) + } + } + + fn data(&self) -> &GreenTokenData { + unsafe { &*Self::remove_tag(self.ptr).as_ptr() } + } + + /// Creates new Token. + #[inline] + pub fn new(data: GreenTokenData) -> GreenToken { + let ptr = Arc::into_raw(Arc::new(data)); + let ptr = ptr::NonNull::new(ptr as *mut _).unwrap(); + GreenToken { + ptr: Self::add_tag(ptr), + } + } + + /// Kind of this Token. + #[inline] + pub fn kind(&self) -> SyntaxKind { + self.data().kind + } + + /// Text of this Token. + #[inline] + pub fn text<'i, I>(&self, resolver: &'i I) -> &'i str + where + I: Resolver + ?Sized, + { + resolver.resolve(&self.data().text) + } + + /// Returns the length of the text covered by this token. + #[inline] + pub fn text_len(&self) -> TextSize { + self.data().text_len + } +} + +impl fmt::Debug for GreenToken { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let data = self.data(); + f.debug_struct("GreenToken") + .field("kind", &data.kind) + .field("text", &data.text) + .finish() + } +} + +impl Clone for GreenToken { + fn clone(&self) -> Self { + let ptr = Self::remove_tag(self.ptr); + let ptr = unsafe { + let arc = ManuallyDrop::new(Arc::from_raw(ptr.as_ptr())); + Arc::into_raw(Arc::clone(&arc)) + }; + let ptr = ptr::NonNull::new(ptr as *mut _).unwrap(); + GreenToken { + ptr: Self::add_tag(ptr), + } + } +} + +impl Eq for GreenToken {} +impl PartialEq for GreenToken { + fn eq(&self, other: &Self) -> bool { + self.data() == other.data() + } +} + +impl hash::Hash for GreenToken { + fn hash(&self, state: &mut H) + where + H: hash::Hasher, + { + self.data().hash(state) + } +} + +impl Drop for GreenToken { + fn drop(&mut self) { + unsafe { + Arc::from_raw(Self::remove_tag(self.ptr).as_ptr()); + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..978cc71 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,67 @@ +//! `cstree` is a generic library for creating and working with concrete syntax trees. +//! The concept of CSTs is inspired in part by Swift's +//! [libsyntax](https://github.com/apple/swift/tree/5e2c815edfd758f9b1309ce07bfc01c4bc20ec23/lib/Syntax). +//! +//! The `cstree` implementation is a fork of the excellent +//! [`rowan`](https://github.com/rust-analyzer/rowan/), developed by the authors of +//! [rust-analyzer](https://github.com/rust-analyzer/rust-analyzer/). +//! While we are building our own documentation, a conceptual overview of their implementation is +//! available in the [rust-analyzer +//! repo](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md#trees). +//! +//! Notable differences of `cstree` compared to `rowan`: +//! - Syntax trees (red trees) are created lazily, but are persistent. Once a node has been created, +//! it will remain allocated, while `rowan` re-creates the red layer on the fly. Apart from the +//! trade-off discussed +//! [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md#memoized-rednodes), +//! this helps to achieve good tree traversal speed while providing the next points: +//! - Syntax (red) nodes are `Send` and `Sync`, allowing to share realized trees across threads. This is achieved by +//! atomically reference counting syntax trees as a whole, which also gets rid of the need to reference count +//! individual nodes (helping with the point above). +//! - Syntax nodes can hold custom data. +//! - `cstree` trees are trees over interned strings. This means `cstree` will deduplicate the text +//! of tokens such as identifiers with the same name. In this position, `rowan` stores each string, +//! with a small string optimization (see [`SmolStr`](https://crates.io/crates/smol_str)). +//! - Performance optimizations for tree creation: only allocate new nodes on the heap if they are not in cache, avoid +//! recursively hashing subtrees +//! +//! See `examples/s_expressions.rs` for a tutorial. +#![forbid( + // missing_debug_implementations, + unconditional_recursion, + future_incompatible, + // missing_docs, +)] +#![deny(unsafe_code)] + +#[allow(unsafe_code)] +mod green; +#[allow(unsafe_code)] +pub mod syntax; + +#[cfg(feature = "serde1")] +mod serde_impls; +mod syntax_text; +mod utility_types; + +pub mod interning { + pub use lasso::{Interner, Reader, Resolver}; +} +use std::fmt; + +// Reexport types for working with strings. +pub use text_size::{TextLen, TextRange, TextSize}; + +pub use crate::{ + green::{Checkpoint, Children, GreenNode, GreenNodeBuilder, GreenToken, SyntaxKind}, + syntax::{SyntaxElement, SyntaxElementChildren, SyntaxElementRef, SyntaxNode, SyntaxNodeChildren, SyntaxToken}, + syntax_text::SyntaxText, + utility_types::{Direction, NodeOrToken, TokenAtOffset, WalkEvent}, +}; + +pub trait Language: Sized + Clone + Copy + fmt::Debug + Eq + Ord + std::hash::Hash { + type Kind: fmt::Debug; + + fn kind_from_raw(raw: SyntaxKind) -> Self::Kind; + fn kind_to_raw(kind: Self::Kind) -> SyntaxKind; +} diff --git a/src/serde_impls.rs b/src/serde_impls.rs new file mode 100644 index 0000000..10d7d18 --- /dev/null +++ b/src/serde_impls.rs @@ -0,0 +1,66 @@ +use serde::ser::{Serialize, SerializeMap, SerializeSeq, Serializer}; +use std::fmt; + +use crate::{ + api::{Language, SyntaxNode, SyntaxToken}, + NodeOrToken, +}; + +struct SerDisplay(T); +impl Serialize for SerDisplay { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.collect_str(&self.0) + } +} + +struct DisplayDebug(T); +impl fmt::Display for DisplayDebug { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.0, f) + } +} + +impl Serialize for SyntaxNode { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut state = serializer.serialize_map(Some(3))?; + state.serialize_entry("kind", &SerDisplay(DisplayDebug(self.kind())))?; + state.serialize_entry("text_range", &self.text_range())?; + state.serialize_entry("children", &Children(self))?; + state.end() + } +} + +impl Serialize for SyntaxToken { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut state = serializer.serialize_map(Some(3))?; + state.serialize_entry("kind", &SerDisplay(DisplayDebug(self.kind())))?; + state.serialize_entry("text_range", &self.text_range())?; + state.serialize_entry("text", &self.text().as_str())?; + state.end() + } +} + +struct Children(T); + +impl Serialize for Children<&'_ SyntaxNode> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut state = serializer.serialize_seq(None)?; + self.0.children_with_tokens().try_for_each(|element| match element { + NodeOrToken::Node(it) => state.serialize_element(&it), + NodeOrToken::Token(it) => state.serialize_element(&it), + })?; + state.end() + } +} diff --git a/src/syntax.rs b/src/syntax.rs new file mode 100644 index 0000000..56ddf9c --- /dev/null +++ b/src/syntax.rs @@ -0,0 +1,1269 @@ +use std::{ + cell::UnsafeCell, + fmt::Write, + hash::{Hash, Hasher}, + iter, ptr, + sync::atomic::{AtomicU32, Ordering}, +}; + +use parking_lot::RwLock; +use servo_arc::Arc; + +use crate::{ + green::{GreenElementRef, SyntaxKind}, + interning::Resolver, + Children, Direction, GreenNode, GreenToken, Language, NodeOrToken, SyntaxText, TextRange, TextSize, TokenAtOffset, + WalkEvent, +}; + +// A note on `#[inline]` usage in this file: +// In `rowan`, there are two layers of `SyntaxXY`s: the `cursor` layer and the `api` layer. +// The `cursor` layer handles all of the actual methods on the tree, while the `api` layer is +// generic over the `Language` of the tree and otherwise forwards its implementation to the `cursor` +// layer. +// Here, we have unified the `cursor` and the `api` layer into the `syntax` layer. +// This means that all of our types here are generic over a `Language`, including the +// implementations which, in `rowan`, are part of the `cursor` layer. +// Very apparently, this makes the compiler less willing to inline. Almost every "regular use" +// method in this file has some kind of `#[inline]` annotation to counteract that. This is _NOT_ +// just for fun, not inlining decreases tree traversal speed by approx. 50% at the time of writing +// this. +// +// - DQ 01/2021 + +#[repr(transparent)] +pub struct SyntaxNode { + data: *mut NodeData, +} + +impl SyntaxNode { + pub fn debug(&self, resolver: &impl Resolver, recursive: bool) -> String { + // NOTE: `fmt::Write` methods on `String` never fail + let mut res = String::new(); + if recursive { + let mut level = 0; + for event in self.preorder_with_tokens() { + match event { + WalkEvent::Enter(element) => { + for _ in 0..level { + write!(res, " ").unwrap(); + } + write!( + res, + "{}\n", + match element { + NodeOrToken::Node(node) => node.debug(resolver, false), + NodeOrToken::Token(token) => token.debug(resolver), + }, + ) + .unwrap(); + level += 1; + } + WalkEvent::Leave(_) => level -= 1, + } + } + assert_eq!(level, 0); + } else { + write!(res, "{:?}@{:?}", self.kind(), self.text_range()).unwrap(); + } + res + } + + pub fn display(&self, resolver: &impl Resolver) -> String { + let mut res = String::new(); + self.preorder_with_tokens() + .filter_map(|event| match event { + WalkEvent::Enter(NodeOrToken::Token(token)) => Some(token), + _ => None, + }) + .try_for_each(|it| write!(res, "{}", it.display(resolver))) + .unwrap(); + res + } +} + +impl Clone for SyntaxNode { + fn clone(&self) -> Self { + // safety:: the ref count is only dropped when there are no more external references (see below) + // since we are currently cloning such a reference, there is still at least one + let ref_count = unsafe { &mut *self.data().ref_count }; + ref_count.fetch_add(1, Ordering::AcqRel); + self.clone_uncounted() + } +} + +impl Drop for SyntaxNode { + fn drop(&mut self) { + // safety:: the ref count is only dropped when there are no more external references (see below) + // and all nodes but the root have been dropped. + // if we are the last external reference, we have not yet dropped the ref count + // if we aren't we won't enter the `if` below + let ref_count = unsafe { &*self.data().ref_count }; + let refs = ref_count.fetch_sub(1, Ordering::AcqRel); + if refs == 1 { + // drop from parent + // NOTE regarding drop orders: since `SyntaxNode::drop` looks at the `ref_count`, we + // need to first drop the `root` and only then its `root_data` and the contained + // `ref_count` + let root = self.root(); + let mut root = root.clone_uncounted(); + let ref_count = unsafe { Box::from_raw(root.data().ref_count) }; + root.drop_recursive(); + let root_data = root.data; + drop(root); + unsafe { drop(Box::from_raw(root_data)) }; + drop(ref_count); + } + } +} + +impl SyntaxNode { + #[inline] + fn data(&self) -> &NodeData { + unsafe { &*self.data } + } + + /// # Safety: + /// Caller must ensure that the access to the underlying data is unique (no active _mutable or immutable_ + /// references). + #[inline] + unsafe fn data_mut(&self) -> &mut NodeData { + &mut *self.data + } + + #[inline] + fn clone_uncounted(&self) -> Self { + Self { data: self.data } + } + + fn root(&self) -> &SyntaxNode { + let mut current = self; + while let Some(parent) = current.parent() { + current = parent; + } + current + } + + fn drop_recursive(&mut self) { + let data = self.data(); + for i in 0..data.children.len() { + // safety: `child_locks` and `children` are pre-allocated to the same length + let _write = unsafe { data.child_locks.get_unchecked(i).write() }; + // safety: protected by the write lock + let slot = unsafe { &mut *data.children.get_unchecked(i).get() }; + let mut child_data = None; + if let Some(child) = slot { + // Tokens have no children that point to them, so if there are no external pointers + // and the pointer from the parent is dropped they will be dropped. + // Nodes may be pointed to by their children, hence we check them first. + if let NodeOrToken::Node(node) = child { + node.drop_recursive(); + child_data = Some(node.data); + } + } + // if the above `if let` was true, this drops `child` + *slot = None; + if let Some(data) = child_data { + // the current `slot` contained a child, which was a node with `data` + + // safety: since there are no more `parent` pointers from the children of the + // node this data belonged to, and we have just dropped the node, there are now + // no more references to `data` + let data = unsafe { Box::from_raw(data) }; + drop(data); + } + } + } +} + +// Identity semantics for hash & eq +impl PartialEq for SyntaxNode { + fn eq(&self, other: &SyntaxNode) -> bool { + self.green().ptr() == other.green().ptr() && self.text_range().start() == other.text_range().start() + } +} + +impl Eq for SyntaxNode {} + +impl Hash for SyntaxNode { + fn hash(&self, state: &mut H) { + ptr::hash(self.green().ptr(), state); + self.text_range().start().hash(state); + } +} + +pub struct SyntaxToken { + parent: SyntaxNode, + index: u32, + offset: TextSize, +} + +impl Clone for SyntaxToken { + fn clone(&self) -> Self { + Self { + parent: self.parent.clone(), + index: self.index, + offset: self.offset, + } + } +} + +impl Hash for SyntaxToken { + fn hash(&self, state: &mut H) { + self.parent.hash(state); + self.index.hash(state); + self.offset.hash(state); + } +} + +impl PartialEq for SyntaxToken { + fn eq(&self, other: &SyntaxToken) -> bool { + self.parent == other.parent && self.index == other.index && self.offset == other.offset + } +} + +impl Eq for SyntaxToken {} + +impl SyntaxToken { + pub fn debug(&self, resolver: &impl Resolver) -> String { + let mut res = String::new(); + write!(res, "{:?}@{:?}", self.kind(), self.text_range()).unwrap(); + if self.text(resolver).len() < 25 { + write!(res, " {:?}", self.text(resolver)).unwrap(); + return res; + } + let text = self.text(resolver); + for idx in 21..25 { + if text.is_char_boundary(idx) { + let text = format!("{} ...", &text[..idx]); + write!(res, " {:?}", text).unwrap(); + return res; + } + } + unreachable!() + } + + pub fn display(&self, resolver: &impl Resolver) -> String { + self.text(resolver).to_string() + } +} + +pub type SyntaxElement = NodeOrToken, SyntaxToken>; + +impl From> for SyntaxElement { + fn from(node: SyntaxNode) -> SyntaxElement { + NodeOrToken::Node(node) + } +} + +impl From> for SyntaxElement { + fn from(token: SyntaxToken) -> SyntaxElement { + NodeOrToken::Token(token) + } +} + +impl SyntaxElement { + pub fn display(&self, resolver: &impl Resolver) -> String { + match self { + NodeOrToken::Node(it) => it.display(resolver), + NodeOrToken::Token(it) => it.display(resolver), + } + } +} + +pub type SyntaxElementRef<'a, L, D = ()> = NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>; + +impl<'a, L: Language, D> From<&'a SyntaxNode> for SyntaxElementRef<'a, L, D> { + fn from(node: &'a SyntaxNode) -> Self { + NodeOrToken::Node(node) + } +} + +impl<'a, L: Language, D> From<&'a SyntaxToken> for SyntaxElementRef<'a, L, D> { + fn from(token: &'a SyntaxToken) -> Self { + NodeOrToken::Token(token) + } +} + +impl<'a, L: Language, D> From<&'a SyntaxElement> for SyntaxElementRef<'a, L, D> { + fn from(element: &'a SyntaxElement) -> Self { + match element { + NodeOrToken::Node(it) => Self::Node(it), + NodeOrToken::Token(it) => Self::Token(it), + } + } +} + +impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> { + pub fn display(&self, resolver: &impl Resolver) -> String { + match self { + NodeOrToken::Node(it) => it.display(resolver), + NodeOrToken::Token(it) => it.display(resolver), + } + } +} + +enum Kind { + Root(GreenNode), + Child { + parent: SyntaxNode, + index: u32, + offset: TextSize, + }, +} + +impl Kind { + fn as_child(&self) -> Option<(&SyntaxNode, u32, TextSize)> { + match self { + Kind::Child { parent, index, offset } => Some((parent, *index, *offset)), + _ => None, + } + } +} + +struct NodeData { + kind: Kind, + green: ptr::NonNull, + ref_count: *mut AtomicU32, + data: RwLock>>, + children: Vec>>>, + child_locks: Vec>, +} + +impl NodeData { + fn new( + kind: Kind, + green: ptr::NonNull, + ref_count: *mut AtomicU32, + n_children: usize, + ) -> *mut Self { + let mut children = Vec::with_capacity(n_children); + let mut child_locks = Vec::with_capacity(n_children); + children.extend((0..n_children).map(|_| Default::default())); + child_locks.extend((0..n_children).map(|_| Default::default())); + Box::into_raw(Box::new(Self { + kind, + green, + ref_count, + data: RwLock::default(), + children, + child_locks, + })) + } +} + +impl SyntaxNode { + fn new(data: *mut NodeData) -> SyntaxNode { + Self { data } + } + + pub fn new_root(green: GreenNode) -> SyntaxNode { + let ref_count = Box::new(AtomicU32::new(1)); + let n_children = green.children().count(); + let data = NodeData::new( + Kind::Root(green), + ptr::NonNull::dangling(), + Box::into_raw(ref_count), + n_children, + ); + let ret = Self::new(data); + let green: ptr::NonNull = match &ret.data().kind { + Kind::Root(green) => green.into(), + _ => unreachable!(), + }; + // safety: we have just created `ret` and have not shared it + unsafe { ret.data_mut() }.green = green; + ret + } + + // Technically, unsafe, but private so that's OK. + // Safety: `green` must be a descendent of `parent.green` + fn new_child( + green: &GreenNode, + parent: &SyntaxNode, + index: u32, + offset: TextSize, + ref_count: *mut AtomicU32, + ) -> SyntaxNode { + let n_children = green.children().count(); + let data = NodeData::new( + Kind::Child { + parent: parent.clone_uncounted(), + index, + offset, + }, + green.into(), + ref_count, + n_children, + ); + Self::new(data) + } + + pub fn set_data(&self, data: D) -> Arc { + let mut ptr = self.data().data.write(); + let data = Arc::new(data); + *ptr = Some(Arc::clone(&data)); + data + } + + pub fn try_set_data(&self, data: D) -> Result, D> { + let mut ptr = self.data().data.write(); + if ptr.is_some() { + return Err(data); + } + let data = Arc::new(data); + *ptr = Some(Arc::clone(&data)); + Ok(data) + } + + pub fn get_data(&self) -> Option> { + let ptr = self.data().data.read(); + (*ptr).as_ref().map(|ptr| Arc::clone(ptr)) + } + + pub fn clear_data(&self) { + let mut ptr = self.data().data.write(); + *ptr = None; + } + + #[inline] + fn read(&self, index: usize) -> Option> { + // safety: children are pre-allocated and indices are determined internally + let _read = unsafe { self.data().child_locks.get_unchecked(index).read() }; + // safety: mutable accesses to the slot only occur below and have to take the lock + let slot = unsafe { &*self.data().children.get_unchecked(index).get() }; + slot.as_ref().map(|elem| elem.into()) + } + + fn try_write(&self, index: usize, elem: SyntaxElement) { + // safety: children are pre-allocated and indices are determined internally + let _write = unsafe { self.data().child_locks.get_unchecked(index).write() }; + // safety: we are the only writer and there are no readers as evidenced by the write lock + let slot = unsafe { &mut *self.data().children.get_unchecked(index).get() }; + if slot.is_none() { + // we are first to initialize the child + *slot = Some(elem); + } else { + // another thread got the write lock first and already initialized it + match elem { + SyntaxElement::Node(node) => { + // There are three things to handle here: + // 1) `node` was just created, which allocated `NodeData` that we now need to + // drop, and + // 2) dropping `node` will decrement the global `ref_count`, even though the + // count was not incremented when creating `node` (because it is an + // internal reference). Thus, we need to bump the count up by one. + // 3) dropping `node`'s `NodeData` will drop its `parent` reference, which + // will again decrement the `ref_count`. Thus, we have to offset by 2 + // overall. + + // safety: `node` was just created and has not been shared + let ref_count = unsafe { Box::from_raw(node.data().ref_count) }; + ref_count.fetch_add(2, Ordering::AcqRel); + let node_data = node.data; + drop(node); + unsafe { drop(Box::from_raw(node_data)) }; + drop(ref_count); + } + SyntaxElement::Token(token) => { + // We don't have to worry about `NodeData` or `SyntaxToken`'s own `Drop` here, + // but we will still drop `token`'s `parent`, which decreases the `ref_count` + // by one. + + // safety: as above + let ref_count = unsafe { &*token.parent.data().ref_count }; + ref_count.fetch_add(1, Ordering::AcqRel); + drop(token); + } + } + } + } + + #[inline(always)] + fn get_or_add_node(&self, node: &GreenNode, index: usize, offset: TextSize) -> SyntaxElementRef<'_, L, D> { + if let Some(elem) = self.read(index) { + debug_assert_eq!(elem.text_range().start(), offset); + return elem; + } + self.try_write( + index, + Self::new_child(node, self, index as u32, offset, self.data().ref_count).into(), + ); + self.read(index).unwrap() + } + + #[inline(always)] + fn get_or_add_element( + &self, + element: GreenElementRef<'_>, + index: usize, + offset: TextSize, + ) -> SyntaxElementRef<'_, L, D> { + if let Some(elem) = self.read(index) { + debug_assert_eq!(elem.text_range().start(), offset); + return elem; + } + self.try_write( + index, + SyntaxElement::new(element, self, index as u32, offset, self.data().ref_count), + ); + self.read(index).unwrap() + } + + /// Returns a green tree, equal to the green tree this node + /// belongs two, except with this node substitute. The complexity + /// of operation is proportional to the depth of the tree + pub fn replace_with(&self, replacement: GreenNode) -> GreenNode { + assert_eq!(self.syntax_kind(), replacement.kind()); + match self.data().kind.as_child() { + None => replacement, // `None` means `self` is the root + Some((parent, me, _offset)) => { + let mut replacement = Some(replacement); + let children = parent.green().children().enumerate().map(|(i, child)| { + if i as u32 == me { + replacement.take().unwrap().into() + } else { + child.cloned() + } + }); + let new_parent = GreenNode::new(parent.syntax_kind(), children); + parent.replace_with(new_parent) + } + } + } + + #[inline] + pub fn syntax_kind(&self) -> SyntaxKind { + self.green().kind() + } + + #[inline] + pub fn kind(&self) -> L::Kind { + L::kind_from_raw(self.syntax_kind()) + } + + #[inline] + pub fn text_range(&self) -> TextRange { + let offset = match self.data().kind.as_child() { + Some((_, _, it)) => it, + _ => 0.into(), + }; + TextRange::at(offset, self.green().text_len()) + } + + #[inline] + pub fn text<'n, 'i, I>(&'n self, resolver: &'i I) -> SyntaxText<'n, 'i, I, L, D> + where + I: Resolver + ?Sized, + { + SyntaxText::new(self, resolver) + } + + #[inline] + pub fn green(&self) -> &GreenNode { + unsafe { self.data().green.as_ref() } + } + + #[inline] + pub fn parent(&self) -> Option<&SyntaxNode> { + match &self.data().kind { + Kind::Root(_) => None, + Kind::Child { parent, .. } => Some(parent), + } + } + + #[inline] + pub fn ancestors(&self) -> impl Iterator> { + iter::successors(Some(self), |&node| node.parent()) + } + + #[inline] + pub fn children(&self) -> SyntaxNodeChildren<'_, L, D> { + SyntaxNodeChildren::new(self) + } + + #[inline] + pub fn children_with_tokens(&self) -> SyntaxElementChildren<'_, L, D> { + SyntaxElementChildren::new(self) + } + + #[inline] + pub fn first_child(&self) -> Option<&SyntaxNode> { + let (node, (index, offset)) = filter_nodes(self.green().children_from(0, self.text_range().start())).next()?; + self.get_or_add_node(node, index, offset).as_node().map(|node| *node) + } + + #[inline] + pub fn first_child_or_token(&self) -> Option> { + let (element, (index, offset)) = self.green().children_from(0, self.text_range().start()).next()?; + Some(self.get_or_add_element(element, index, offset)) + } + + #[inline] + pub fn last_child(&self) -> Option<&SyntaxNode> { + let (node, (index, offset)) = filter_nodes( + self.green() + .children_to(self.green().children().len(), self.text_range().end()), + ) + .next()?; + self.get_or_add_node(node, index, offset).as_node().map(|node| *node) + } + + #[inline] + pub fn last_child_or_token(&self) -> Option> { + let (element, (index, offset)) = self + .green() + .children_to(self.green().children().len(), self.text_range().end()) + .next()?; + Some(self.get_or_add_element(element, index, offset)) + } + + #[inline] + pub fn next_child_after(&self, n: usize, offset: TextSize) -> Option<&SyntaxNode> { + let (node, (index, offset)) = filter_nodes(self.green().children_from(n + 1, offset)).next()?; + self.get_or_add_node(node, index, offset).as_node().map(|node| *node) + } + + #[inline] + pub fn next_child_or_token_after(&self, n: usize, offset: TextSize) -> Option> { + let (element, (index, offset)) = self.green().children_from(n + 1, offset).next()?; + Some(self.get_or_add_element(element, index, offset)) + } + + #[inline] + pub fn prev_child_before(&self, n: usize, offset: TextSize) -> Option<&SyntaxNode> { + let (node, (index, offset)) = filter_nodes(self.green().children_to(n, offset)).next()?; + self.get_or_add_node(node, index, offset).as_node().map(|node| *node) + } + + #[inline] + pub fn prev_child_or_token_before(&self, n: usize, offset: TextSize) -> Option> { + let (element, (index, offset)) = self.green().children_to(n, offset).next()?; + Some(self.get_or_add_element(element, index, offset)) + } + + #[inline] + pub fn next_sibling(&self) -> Option<&SyntaxNode> { + let (parent, index, _) = self.data().kind.as_child()?; + + let (node, (index, offset)) = filter_nodes( + parent + .green() + .children_from((index + 1) as usize, self.text_range().end()), + ) + .next()?; + parent.get_or_add_node(node, index, offset).as_node().map(|node| *node) + } + + #[inline] + pub fn next_sibling_or_token(&self) -> Option> { + let (parent, index, _) = self.data().kind.as_child()?; + + let (element, (index, offset)) = parent + .green() + .children_from((index + 1) as usize, self.text_range().end()) + .next()?; + Some(parent.get_or_add_element(element, index, offset)) + } + + #[inline] + pub fn prev_sibling(&self) -> Option<&SyntaxNode> { + let (parent, index, _) = self.data().kind.as_child()?; + + let (node, (index, offset)) = + filter_nodes(parent.green().children_to(index as usize, self.text_range().start())).next()?; + parent.get_or_add_node(node, index, offset).as_node().map(|node| *node) + } + + #[inline] + pub fn prev_sibling_or_token(&self) -> Option> { + let (parent, index, _) = self.data().kind.as_child()?; + + let (element, (index, offset)) = parent + .green() + .children_to(index as usize, self.text_range().start()) + .next()?; + Some(parent.get_or_add_element(element, index, offset)) + } + + /// Return the leftmost token in the subtree of this node + #[inline] + pub fn first_token(&self) -> Option<&SyntaxToken> { + self.first_child_or_token()?.first_token() + } + + /// Return the rightmost token in the subtree of this node + #[inline] + pub fn last_token(&self) -> Option<&SyntaxToken> { + self.last_child_or_token()?.last_token() + } + + #[inline] + pub fn siblings(&self, direction: Direction) -> impl Iterator> { + iter::successors(Some(self), move |node| match direction { + Direction::Next => node.next_sibling(), + Direction::Prev => node.prev_sibling(), + }) + } + + #[inline] + pub fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator> { + let me: SyntaxElementRef<'_, L, D> = self.into(); + iter::successors(Some(me), move |el| match direction { + Direction::Next => el.next_sibling_or_token(), + Direction::Prev => el.prev_sibling_or_token(), + }) + } + + #[inline] + pub fn descendants(&self) -> impl Iterator> { + self.preorder().filter_map(|event| match event { + WalkEvent::Enter(node) => Some(node), + WalkEvent::Leave(_) => None, + }) + } + + #[inline] + pub fn descendants_with_tokens(&self) -> impl Iterator> { + self.preorder_with_tokens().filter_map(|event| match event { + WalkEvent::Enter(it) => Some(it), + WalkEvent::Leave(_) => None, + }) + } + + /// Traverse the subtree rooted at the current node (including the current + /// node) in preorder, excluding tokens. + #[inline] + pub fn preorder(&self) -> impl Iterator>> { + iter::successors(Some(WalkEvent::Enter(self)), move |pos| { + let next = match pos { + WalkEvent::Enter(node) => match node.first_child() { + Some(child) => WalkEvent::Enter(child), + None => WalkEvent::Leave(*node), + }, + WalkEvent::Leave(node) => { + if node == &self { + return None; + } + match node.next_sibling() { + Some(sibling) => WalkEvent::Enter(sibling), + None => WalkEvent::Leave(node.parent().unwrap()), + } + } + }; + Some(next) + }) + } + + /// Traverse the subtree rooted at the current node (including the current + /// node) in preorder, including tokens. + #[inline] + pub fn preorder_with_tokens(&self) -> impl Iterator>> { + let me = self.into(); + iter::successors(Some(WalkEvent::Enter(me)), move |pos| { + let next = match pos { + WalkEvent::Enter(el) => match el { + NodeOrToken::Node(node) => match node.first_child_or_token() { + Some(child) => WalkEvent::Enter(child), + None => WalkEvent::Leave((*node).into()), + }, + NodeOrToken::Token(token) => WalkEvent::Leave((*token).into()), + }, + WalkEvent::Leave(el) => { + if el == &me { + return None; + } + match el.next_sibling_or_token() { + Some(sibling) => WalkEvent::Enter(sibling), + None => WalkEvent::Leave(el.parent().unwrap().into()), + } + } + }; + Some(next) + }) + } + + /// Find a token in the subtree corresponding to this node, which covers the offset. + /// Precondition: offset must be withing node's range. + pub fn token_at_offset(&self, offset: TextSize) -> TokenAtOffset> { + // TODO: this could be faster if we first drill-down to node, and only + // then switch to token search. We should also replace explicit + // recursion with a loop. + let range = self.text_range(); + assert!( + range.start() <= offset && offset <= range.end(), + "Bad offset: range {:?} offset {:?}", + range, + offset + ); + if range.is_empty() { + return TokenAtOffset::None; + } + + let mut children = self.children_with_tokens().filter(|child| { + let child_range = child.text_range(); + !child_range.is_empty() && (child_range.start() <= offset && offset <= child_range.end()) + }); + + let left = children.next().unwrap(); + let right = children.next(); + assert!(children.next().is_none()); + + if let Some(right) = right { + match (left.token_at_offset(offset), right.token_at_offset(offset)) { + (TokenAtOffset::Single(left), TokenAtOffset::Single(right)) => TokenAtOffset::Between(left, right), + _ => unreachable!(), + } + } else { + left.token_at_offset(offset) + } + } + + /// Return the deepest node or token in the current subtree that fully + /// contains the range. If the range is empty and is contained in two leaf + /// nodes, either one can be returned. Precondition: range must be contained + /// withing the current node + pub fn covering_element(&self, range: TextRange) -> SyntaxElementRef<'_, L, D> { + let mut res: SyntaxElementRef<'_, L, D> = self.into(); + loop { + assert!( + res.text_range().contains_range(range), + "Bad range: node range {:?}, range {:?}", + res.text_range(), + range, + ); + res = match &res { + NodeOrToken::Token(_) => return res, + NodeOrToken::Node(node) => { + match node + .children_with_tokens() + .find(|child| child.text_range().contains_range(range)) + { + Some(child) => child, + None => return res, + } + } + }; + } + } +} + +impl SyntaxToken { + fn new(parent: &SyntaxNode, index: u32, offset: TextSize) -> SyntaxToken { + Self { + parent: parent.clone_uncounted(), + index, + offset, + } + } + + /// Returns a green tree, equal to the green tree this token + /// belongs two, except with this token substitute. The complexity + /// of operation is proportional to the depth of the tree + pub fn replace_with(&self, replacement: GreenToken) -> GreenNode { + assert_eq!(self.syntax_kind(), replacement.kind()); + let mut replacement = Some(replacement); + let parent = self.parent(); + let me = self.index; + + let children = parent.green().children().enumerate().map(|(i, child)| { + if i as u32 == me { + replacement.take().unwrap().into() + } else { + child.cloned() + } + }); + let new_parent = GreenNode::new(parent.syntax_kind(), children); + parent.replace_with(new_parent) + } + + #[inline] + pub fn syntax_kind(&self) -> SyntaxKind { + self.green().kind() + } + + #[inline] + pub fn kind(&self) -> L::Kind { + L::kind_from_raw(self.syntax_kind()) + } + + #[inline] + pub fn text_range(&self) -> TextRange { + TextRange::at(self.offset, self.green().text_len()) + } + + #[inline] + pub fn text<'i, I>(&self, resolver: &'i I) -> &'i str + where + I: Resolver + ?Sized, + { + self.green().text(resolver) + } + + pub fn green(&self) -> &GreenToken { + self.parent + .green() + .children() + .nth(self.index as usize) + .unwrap() + .as_token() + .unwrap() + } + + #[inline] + pub fn parent(&self) -> &SyntaxNode { + &self.parent + } + + #[inline] + pub fn ancestors(&self) -> impl Iterator> { + self.parent().ancestors() + } + + #[inline] + pub fn next_sibling_or_token(&self) -> Option> { + self.parent() + .next_child_or_token_after(self.index as usize, self.text_range().end()) + } + + #[inline] + pub fn prev_sibling_or_token(&self) -> Option> { + self.parent() + .prev_child_or_token_before(self.index as usize, self.text_range().start()) + } + + #[inline] + pub fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator> { + let me: SyntaxElementRef<'_, L, D> = self.into(); + iter::successors(Some(me), move |el| match direction { + Direction::Next => el.next_sibling_or_token(), + Direction::Prev => el.prev_sibling_or_token(), + }) + } + + /// Next token in the tree (i.e, not necessary a sibling) + pub fn next_token(&self) -> Option<&SyntaxToken> { + match self.next_sibling_or_token() { + Some(element) => element.first_token(), + None => self + .parent() + .ancestors() + .find_map(|it| it.next_sibling_or_token()) + .and_then(|element| element.first_token()), + } + } + + /// Previous token in the tree (i.e, not necessary a sibling) + pub fn prev_token(&self) -> Option<&SyntaxToken> { + match self.prev_sibling_or_token() { + Some(element) => element.last_token(), + None => self + .parent() + .ancestors() + .find_map(|it| it.prev_sibling_or_token()) + .and_then(|element| element.last_token()), + } + } +} + +impl SyntaxElement { + fn new( + element: GreenElementRef<'_>, + parent: &SyntaxNode, + index: u32, + offset: TextSize, + ref_count: *mut AtomicU32, + ) -> SyntaxElement { + match element { + NodeOrToken::Node(node) => SyntaxNode::new_child(node, parent, index as u32, offset, ref_count).into(), + NodeOrToken::Token(_) => SyntaxToken::new(parent, index as u32, offset).into(), + } + } + + #[inline] + pub fn text_range(&self) -> TextRange { + match self { + NodeOrToken::Node(it) => it.text_range(), + NodeOrToken::Token(it) => it.text_range(), + } + } + + #[inline] + pub fn syntax_kind(&self) -> SyntaxKind { + match self { + NodeOrToken::Node(it) => it.syntax_kind(), + NodeOrToken::Token(it) => it.syntax_kind(), + } + } + + #[inline] + pub fn kind(&self) -> L::Kind { + match self { + NodeOrToken::Node(it) => it.kind(), + NodeOrToken::Token(it) => it.kind(), + } + } + + #[inline] + pub fn parent(&self) -> Option<&SyntaxNode> { + match self { + NodeOrToken::Node(it) => it.parent(), + NodeOrToken::Token(it) => Some(it.parent()), + } + } + + #[inline] + pub fn ancestors(&self) -> impl Iterator> { + match self { + NodeOrToken::Node(it) => it.ancestors(), + NodeOrToken::Token(it) => it.parent().ancestors(), + } + } + + #[inline] + pub fn first_token(&self) -> Option<&SyntaxToken> { + match self { + NodeOrToken::Node(it) => it.first_token(), + NodeOrToken::Token(it) => Some(it), + } + } + + #[inline] + pub fn last_token(&self) -> Option<&SyntaxToken> { + match self { + NodeOrToken::Node(it) => it.last_token(), + NodeOrToken::Token(it) => Some(it), + } + } + + #[inline] + pub fn next_sibling_or_token(&self) -> Option> { + match self { + NodeOrToken::Node(it) => it.next_sibling_or_token(), + NodeOrToken::Token(it) => it.next_sibling_or_token(), + } + } + + #[inline] + pub fn prev_sibling_or_token(&self) -> Option> { + match self { + NodeOrToken::Node(it) => it.prev_sibling_or_token(), + NodeOrToken::Token(it) => it.prev_sibling_or_token(), + } + } +} + +impl<'a, L: Language, D> SyntaxElementRef<'a, L, D> { + #[inline] + pub fn text_range(&self) -> TextRange { + match self { + NodeOrToken::Node(it) => it.text_range(), + NodeOrToken::Token(it) => it.text_range(), + } + } + + #[inline] + pub fn syntax_kind(&self) -> SyntaxKind { + match self { + NodeOrToken::Node(it) => it.syntax_kind(), + NodeOrToken::Token(it) => it.syntax_kind(), + } + } + + #[inline] + pub fn kind(&self) -> L::Kind { + match self { + NodeOrToken::Node(it) => it.kind(), + NodeOrToken::Token(it) => it.kind(), + } + } + + #[inline] + pub fn parent(&self) -> Option<&'a SyntaxNode> { + match self { + NodeOrToken::Node(it) => it.parent(), + NodeOrToken::Token(it) => Some(it.parent()), + } + } + + #[inline] + pub fn ancestors(&self) -> impl Iterator> { + match self { + NodeOrToken::Node(it) => it.ancestors(), + NodeOrToken::Token(it) => it.parent().ancestors(), + } + } + + #[inline] + pub fn first_token(&self) -> Option<&'a SyntaxToken> { + match self { + NodeOrToken::Node(it) => it.first_token(), + NodeOrToken::Token(it) => Some(it), + } + } + + #[inline] + pub fn last_token(&self) -> Option<&'a SyntaxToken> { + match self { + NodeOrToken::Node(it) => it.last_token(), + NodeOrToken::Token(it) => Some(it), + } + } + + #[inline] + pub fn next_sibling_or_token(&self) -> Option> { + match self { + NodeOrToken::Node(it) => it.next_sibling_or_token(), + NodeOrToken::Token(it) => it.next_sibling_or_token(), + } + } + + #[inline] + pub fn prev_sibling_or_token(&self) -> Option> { + match self { + NodeOrToken::Node(it) => it.prev_sibling_or_token(), + NodeOrToken::Token(it) => it.prev_sibling_or_token(), + } + } + + #[inline] + fn token_at_offset(&self, offset: TextSize) -> TokenAtOffset> { + assert!(self.text_range().start() <= offset && offset <= self.text_range().end()); + match self { + NodeOrToken::Token(token) => TokenAtOffset::Single((*token).clone()), + NodeOrToken::Node(node) => node.token_at_offset(offset), + } + } +} + +#[derive(Clone, Debug)] +struct Iter<'n> { + green: Children<'n>, + offset: TextSize, + index: usize, +} + +impl<'n> Iter<'n> { + fn new(parent: &'n SyntaxNode) -> Self { + let offset = parent.text_range().start(); + let green: Children<'_> = parent.green().children(); + Iter { + green, + offset, + index: 0, + } + } + + #[inline(always)] + fn next(&mut self) -> Option<(GreenElementRef, usize, TextSize)> { + self.green.next().map(|element| { + let offset = self.offset; + let index = self.index; + self.offset += element.text_len(); + self.index += 1; + (element, index, offset) + }) + } +} + +#[derive(Clone)] +pub struct SyntaxNodeChildren<'n, L: Language, D: 'static = ()> { + inner: Iter<'n>, + parent: &'n SyntaxNode, +} + +impl<'n, L: Language, D> SyntaxNodeChildren<'n, L, D> { + #[inline] + fn new(parent: &'n SyntaxNode) -> Self { + Self { + inner: Iter::new(parent), + parent, + } + } +} + +impl<'n, L: Language, D> Iterator for SyntaxNodeChildren<'n, L, D> { + type Item = &'n SyntaxNode; + + #[inline(always)] + fn next(&mut self) -> Option { + while let Some((element, index, offset)) = self.inner.next() { + if let Some(&node) = element.as_node() { + return Some(self.parent.get_or_add_node(node, index, offset).as_node().unwrap()); + } + } + None + } +} + +#[derive(Clone)] +pub struct SyntaxElementChildren<'n, L: Language, D: 'static = ()> { + inner: Iter<'n>, + parent: &'n SyntaxNode, +} + +impl<'n, L: Language, D> SyntaxElementChildren<'n, L, D> { + #[inline] + fn new(parent: &'n SyntaxNode) -> Self { + Self { + inner: Iter::new(parent), + parent, + } + } +} + +impl<'n, L: Language, D> Iterator for SyntaxElementChildren<'n, L, D> { + type Item = SyntaxElementRef<'n, L, D>; + + #[inline(always)] + fn next(&mut self) -> Option { + let parent = self.parent; + self.inner + .next() + .map(|(green, index, offset)| parent.get_or_add_element(green, index, offset)) + } +} + +impl GreenNode { + #[inline(always)] + fn children_from( + &self, + start_index: usize, + mut offset: TextSize, + ) -> impl Iterator { + self.children() + .skip(start_index) + .enumerate() + .map(move |(index, element)| { + let element_offset = offset; + offset += element.text_len(); + (element, (start_index + index, element_offset)) + }) + } + + #[inline(always)] + fn children_to( + &self, + end_index: usize, + mut offset: TextSize, + ) -> impl Iterator { + self.children() + .take(end_index) + .rev() + .enumerate() + .map(move |(index, element)| { + offset -= element.text_len(); + (element, (end_index - index - 1, offset)) + }) + } +} + +#[inline(always)] +fn filter_nodes<'a, I: Iterator, T)>, T>( + iter: I, +) -> impl Iterator { + iter.filter_map(|(element, data)| match element { + NodeOrToken::Node(it) => Some((it, data)), + NodeOrToken::Token(_) => None, + }) +} diff --git a/src/syntax_text.rs b/src/syntax_text.rs new file mode 100644 index 0000000..ea2b98a --- /dev/null +++ b/src/syntax_text.rs @@ -0,0 +1,357 @@ +use std::fmt; + +use crate::{interning::Resolver, Language, SyntaxNode, SyntaxToken, TextRange, TextSize}; + +#[derive(Clone)] +pub struct SyntaxText<'n, 'i, I: ?Sized, L: Language, D: 'static = ()> { + node: &'n SyntaxNode, + range: TextRange, + resolver: &'i I, +} + +impl<'n, 'i, I: Resolver + ?Sized, L: Language, D> SyntaxText<'n, 'i, I, L, D> { + pub(crate) fn new(node: &'n SyntaxNode, resolver: &'i I) -> Self { + let range = node.text_range(); + SyntaxText { node, range, resolver } + } + + pub fn len(&self) -> TextSize { + self.range.len() + } + + pub fn is_empty(&self) -> bool { + self.range.is_empty() + } + + pub fn contains_char(&self, c: char) -> bool { + self.try_for_each_chunk(|chunk| if chunk.contains(c) { Err(()) } else { Ok(()) }) + .is_err() + } + + pub fn find_char(&self, c: char) -> Option { + let mut acc: TextSize = 0.into(); + let res = self.try_for_each_chunk(|chunk| { + if let Some(pos) = chunk.find(c) { + let pos: TextSize = (pos as u32).into(); + return Err(acc + pos); + } + acc += TextSize::of(chunk); + Ok(()) + }); + found(res) + } + + pub fn char_at(&self, offset: TextSize) -> Option { + let offset = offset.into(); + let mut start: TextSize = 0.into(); + let res = self.try_for_each_chunk(|chunk| { + let end = start + TextSize::of(chunk); + if start <= offset && offset < end { + let off: usize = u32::from(offset - start) as usize; + return Err(chunk[off..].chars().next().unwrap()); + } + start = end; + Ok(()) + }); + found(res) + } + + pub fn slice(&self, range: R) -> Self { + let start = range.start().unwrap_or_default(); + let end = range.end().unwrap_or(self.len()); + assert!(start <= end); + let len = end - start; + let start = self.range.start() + start; + let end = start + len; + assert!( + start <= end, + "invalid slice, range: {:?}, slice: {:?}", + self.range, + (range.start(), range.end()), + ); + let range = TextRange::new(start, end); + assert!( + self.range.contains_range(range), + "invalid slice, range: {:?}, slice: {:?}", + self.range, + range, + ); + SyntaxText { + node: self.node, + range, + resolver: self.resolver, + } + } + + pub fn try_fold_chunks(&self, init: T, mut f: F) -> Result + where + F: FnMut(T, &str) -> Result, + { + self.tokens_with_ranges().try_fold(init, move |acc, (token, range)| { + f(acc, &token.text(self.resolver)[range]) + }) + } + + pub fn try_for_each_chunk Result<(), E>, E>(&self, mut f: F) -> Result<(), E> { + self.try_fold_chunks((), move |(), chunk| f(chunk)) + } + + pub fn for_each_chunk(&self, mut f: F) { + enum Void {} + match self.try_for_each_chunk(|chunk| Ok::<(), Void>(f(chunk))) { + Ok(()) => (), + Err(void) => match void {}, + } + } + + fn tokens_with_ranges(&self) -> impl Iterator, TextRange)> { + let text_range = self.range; + self.node + .descendants_with_tokens() + .filter_map(|element| element.into_token()) + .filter_map(move |token| { + let token_range = token.text_range(); + let range = text_range.intersect(token_range)?; + Some((token, range - token_range.start())) + }) + } +} + +fn found(res: Result<(), T>) -> Option { + match res { + Ok(()) => None, + Err(it) => Some(it), + } +} + +impl fmt::Debug for SyntaxText<'_, '_, I, L, D> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&self.to_string(), f) + } +} + +impl fmt::Display for SyntaxText<'_, '_, I, L, D> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.try_for_each_chunk(|chunk| fmt::Display::fmt(chunk, f)) + } +} + +impl From> for String { + fn from(text: SyntaxText<'_, '_, I, L, D>) -> String { + text.to_string() + } +} + +impl PartialEq for SyntaxText<'_, '_, I, L, D> { + fn eq(&self, mut rhs: &str) -> bool { + self.try_for_each_chunk(|chunk| { + if !rhs.starts_with(chunk) { + return Err(()); + } + rhs = &rhs[chunk.len()..]; + Ok(()) + }) + .is_ok() + && rhs.is_empty() + } +} + +impl PartialEq> for str { + fn eq(&self, rhs: &SyntaxText<'_, '_, I, L, D>) -> bool { + rhs == self + } +} + +impl PartialEq<&'_ str> for SyntaxText<'_, '_, I, L, D> { + fn eq(&self, rhs: &&str) -> bool { + self == *rhs + } +} + +impl PartialEq> for &'_ str { + fn eq(&self, rhs: &SyntaxText<'_, '_, I, L, D>) -> bool { + rhs == self + } +} + +impl<'n1, 'i1, 'n2, 'i2, I1, I2, D1, D2, L1, L2> PartialEq> + for SyntaxText<'n1, 'i1, I1, L1, D1> +where + L1: Language, + L2: Language, + I1: Resolver + ?Sized, + I2: Resolver + ?Sized, +{ + fn eq(&self, other: &SyntaxText<'_, '_, I2, L2, D2>) -> bool { + if self.range.len() != other.range.len() { + return false; + } + let mut lhs = self.tokens_with_ranges(); + let mut rhs = other.tokens_with_ranges(); + zip_texts(&mut lhs, &mut rhs, self.resolver, other.resolver).is_none() + && lhs.all(|it| it.1.is_empty()) + && rhs.all(|it| it.1.is_empty()) + } +} + +fn zip_texts<'it1, 'it2, It1, It2, I1, I2, L1, L2, D1, D2>( + xs: &mut It1, + ys: &mut It2, + resolver_x: &I1, + resolver_y: &I2, +) -> Option<()> +where + It1: Iterator, TextRange)>, + It2: Iterator, TextRange)>, + I1: Resolver + ?Sized, + I2: Resolver + ?Sized, + D1: 'static, + D2: 'static, + L1: Language + 'it1, + L2: Language + 'it2, +{ + let mut x = xs.next()?; + let mut y = ys.next()?; + loop { + while x.1.is_empty() { + x = xs.next()?; + } + while y.1.is_empty() { + y = ys.next()?; + } + let x_text = &x.0.text(resolver_x)[x.1]; + let y_text = &y.0.text(resolver_y)[y.1]; + if !(x_text.starts_with(y_text) || y_text.starts_with(x_text)) { + return Some(()); + } + let advance = std::cmp::min(x.1.len(), y.1.len()); + x.1 = TextRange::new(x.1.start() + advance, x.1.end()); + y.1 = TextRange::new(y.1.start() + advance, y.1.end()); + } +} + +impl Eq for SyntaxText<'_, '_, I, L, D> {} + +mod private { + use std::ops; + + use crate::{TextRange, TextSize}; + + pub trait SyntaxTextRange { + fn start(&self) -> Option; + fn end(&self) -> Option; + } + + impl SyntaxTextRange for TextRange { + fn start(&self) -> Option { + Some(TextRange::start(*self)) + } + + fn end(&self) -> Option { + Some(TextRange::end(*self)) + } + } + + impl SyntaxTextRange for ops::Range { + fn start(&self) -> Option { + Some(self.start) + } + + fn end(&self) -> Option { + Some(self.end) + } + } + + impl SyntaxTextRange for ops::RangeFrom { + fn start(&self) -> Option { + Some(self.start) + } + + fn end(&self) -> Option { + None + } + } + + impl SyntaxTextRange for ops::RangeTo { + fn start(&self) -> Option { + None + } + + fn end(&self) -> Option { + Some(self.end) + } + } + + impl SyntaxTextRange for ops::RangeFull { + fn start(&self) -> Option { + None + } + + fn end(&self) -> Option { + None + } + } +} + +#[cfg(test)] +mod tests { + use crate::{green::SyntaxKind, GreenNodeBuilder}; + + use super::*; + + #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum TestLang {} + impl Language for TestLang { + type Kind = SyntaxKind; + + fn kind_from_raw(raw: SyntaxKind) -> Self::Kind { + raw + } + + fn kind_to_raw(kind: Self::Kind) -> SyntaxKind { + kind + } + } + + fn build_tree(chunks: &[&str]) -> (SyntaxNode, impl Resolver) { + let mut builder = GreenNodeBuilder::new(); + builder.start_node(SyntaxKind(62)); + for &chunk in chunks.iter() { + builder.token(SyntaxKind(92), chunk.into()) + } + builder.finish_node(); + let (node, interner) = builder.finish(); + (SyntaxNode::new_root(node), interner.unwrap()) + } + + #[test] + fn test_text_equality() { + fn do_check(t1: &[&str], t2: &[&str]) { + let (t1, resolver) = build_tree(t1); + let t1 = t1.text(&resolver); + let (t2, resolver) = build_tree(t2); + let t2 = t2.text(&resolver); + let expected = t1.to_string() == t2.to_string(); + let actual = t1 == t2; + assert_eq!(expected, actual, "`{}` (SyntaxText) `{}` (SyntaxText)", t1, t2); + let actual = t1 == &*t2.to_string(); + assert_eq!(expected, actual, "`{}` (SyntaxText) `{}` (&str)", t1, t2); + } + fn check(t1: &[&str], t2: &[&str]) { + do_check(t1, t2); + do_check(t2, t1) + } + + check(&[""], &[""]); + check(&["a"], &[""]); + check(&["a"], &["a"]); + check(&["abc"], &["def"]); + check(&["hello", "world"], &["hello", "world"]); + check(&["hellowo", "rld"], &["hell", "oworld"]); + check(&["hel", "lowo", "rld"], &["helloworld"]); + check(&["{", "abc", "}"], &["{", "123", "}"]); + check(&["{", "abc", "}", "{"], &["{", "123", "}"]); + check(&["{", "abc", "}"], &["{", "123", "}", "{"]); + check(&["{", "abc", "}ab"], &["{", "abc", "}", "ab"]); + } +} diff --git a/src/utility_types.rs b/src/utility_types.rs new file mode 100644 index 0000000..c907ce0 --- /dev/null +++ b/src/utility_types.rs @@ -0,0 +1,142 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum NodeOrToken { + Node(N), + Token(T), +} + +impl NodeOrToken { + pub fn into_node(self) -> Option { + match self { + NodeOrToken::Node(node) => Some(node), + NodeOrToken::Token(_) => None, + } + } + + pub fn into_token(self) -> Option { + match self { + NodeOrToken::Node(_) => None, + NodeOrToken::Token(token) => Some(token), + } + } + + pub fn as_node(&self) -> Option<&N> { + match self { + NodeOrToken::Node(node) => Some(node), + NodeOrToken::Token(_) => None, + } + } + + pub fn as_token(&self) -> Option<&T> { + match self { + NodeOrToken::Node(_) => None, + NodeOrToken::Token(token) => Some(token), + } + } + + pub(crate) fn as_ref(&self) -> NodeOrToken<&N, &T> { + match self { + NodeOrToken::Node(node) => NodeOrToken::Node(node), + NodeOrToken::Token(token) => NodeOrToken::Token(token), + } + } +} + +impl NodeOrToken<&N, &T> { + pub(crate) fn cloned(&self) -> NodeOrToken { + match *self { + NodeOrToken::Node(node) => NodeOrToken::Node(node.clone()), + NodeOrToken::Token(token) => NodeOrToken::Token(token.clone()), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Direction { + Next, + Prev, +} + +/// `WalkEvent` describes tree walking process. +#[derive(Debug, Copy, Clone)] +pub enum WalkEvent { + /// Fired before traversing the node. + Enter(T), + /// Fired after the node is traversed. + Leave(T), +} + +impl WalkEvent { + pub fn map U, U>(self, f: F) -> WalkEvent { + match self { + WalkEvent::Enter(it) => WalkEvent::Enter(f(it)), + WalkEvent::Leave(it) => WalkEvent::Leave(f(it)), + } + } +} + +/// There might be zero, one or two leaves at a given offset. +#[derive(Clone, Debug)] +pub enum TokenAtOffset { + /// No leaves at offset -- possible for the empty file. + None, + /// Only a single leaf at offset. + Single(T), + /// Offset is exactly between two leaves. + Between(T, T), +} + +impl TokenAtOffset { + pub fn map U, U>(self, f: F) -> TokenAtOffset { + match self { + TokenAtOffset::None => TokenAtOffset::None, + TokenAtOffset::Single(it) => TokenAtOffset::Single(f(it)), + TokenAtOffset::Between(l, r) => TokenAtOffset::Between(f(l), f(r)), + } + } + + /// Convert to option, preferring the right leaf in case of a tie. + pub fn right_biased(self) -> Option { + match self { + TokenAtOffset::None => None, + TokenAtOffset::Single(node) => Some(node), + TokenAtOffset::Between(_, right) => Some(right), + } + } + + /// Convert to option, preferring the left leaf in case of a tie. + pub fn left_biased(self) -> Option { + match self { + TokenAtOffset::None => None, + TokenAtOffset::Single(node) => Some(node), + TokenAtOffset::Between(left, _) => Some(left), + } + } +} + +impl Iterator for TokenAtOffset { + type Item = T; + + fn next(&mut self) -> Option { + match std::mem::replace(self, TokenAtOffset::None) { + TokenAtOffset::None => None, + TokenAtOffset::Single(node) => { + *self = TokenAtOffset::None; + Some(node) + } + TokenAtOffset::Between(left, right) => { + *self = TokenAtOffset::Single(right); + Some(left) + } + } + } + + fn size_hint(&self) -> (usize, Option) { + match self { + TokenAtOffset::None => (0, Some(0)), + TokenAtOffset::Single(_) => (1, Some(1)), + TokenAtOffset::Between(_, _) => (2, Some(2)), + } + } +} + +impl ExactSizeIterator for TokenAtOffset {} diff --git a/tests/basic.rs b/tests/basic.rs new file mode 100644 index 0000000..382092d --- /dev/null +++ b/tests/basic.rs @@ -0,0 +1,100 @@ +mod common; + +use common::TestLang; +use cstree::{GreenNodeBuilder, SyntaxKind, SyntaxNode, TextRange}; +use lasso::Resolver; + +#[derive(Debug)] +enum Element<'s> { + Node(Vec>), + Token(&'s str), +} + +fn two_level_tree() -> Element<'static> { + use Element::*; + Node(vec![ + Node(vec![Token("0.0"), Token("0.1")]), + Node(vec![Token("1.0")]), + Node(vec![Token("2.0"), Token("2.1"), Token("2.2")]), + ]) +} + +fn build_tree(root: &Element<'_>) -> (SyntaxNode, impl Resolver) { + let mut builder = GreenNodeBuilder::new(); + build_recursive(root, &mut builder, 0); + let (node, interner) = builder.finish(); + (SyntaxNode::new_root(node), interner.unwrap()) +} + +fn build_recursive(root: &Element<'_>, builder: &mut GreenNodeBuilder, mut from: u16) -> u16 { + match root { + Element::Node(children) => { + builder.start_node(SyntaxKind(from)); + for child in children { + from = build_recursive(child, builder, from + 1); + } + builder.finish_node(); + } + Element::Token(text) => { + builder.token(SyntaxKind(from), *text); + } + } + from +} + +#[test] +fn create() { + let tree = two_level_tree(); + let (tree, resolver) = build_tree::<()>(&tree); + assert_eq!(tree.syntax_kind(), SyntaxKind(0)); + assert_eq!(tree.kind(), SyntaxKind(0)); + { + let leaf1_0 = tree.children().nth(1).unwrap().children_with_tokens().nth(0).unwrap(); + let leaf1_0 = leaf1_0.into_token().unwrap(); + assert_eq!(leaf1_0.syntax_kind(), SyntaxKind(5)); + assert_eq!(leaf1_0.kind(), SyntaxKind(5)); + assert_eq!(leaf1_0.text(&resolver), "1.0"); + assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into())); + } + { + let node2 = tree.children().nth(2).unwrap(); + assert_eq!(node2.syntax_kind(), SyntaxKind(6)); + assert_eq!(node2.kind(), SyntaxKind(6)); + assert_eq!(node2.children_with_tokens().count(), 3); + assert_eq!(node2.text(&resolver), "2.02.12.2"); + } +} + +#[test] +fn data() { + let tree = two_level_tree(); + let (tree, _resolver) = build_tree::(&tree); + { + let node2 = tree.children().nth(2).unwrap(); + assert_eq!(*node2.try_set_data("data".into()).unwrap(), "data"); + let data = node2.get_data().unwrap(); + assert_eq!(data.as_str(), "data"); + node2.set_data("payload".into()); + let data = node2.get_data().unwrap(); + assert_eq!(data.as_str(), "payload"); + } + { + let node2 = tree.children().nth(2).unwrap(); + assert!(node2.try_set_data("already present".into()).is_err()); + let data = node2.get_data().unwrap(); + assert_eq!(data.as_str(), "payload"); + node2.set_data("new data".into()); + } + { + let node2 = tree.children().nth(2).unwrap(); + let data = node2.get_data().unwrap(); + assert_eq!(data.as_str(), "new data"); + node2.clear_data(); + // re-use `data` after node data was cleared + assert_eq!(data.as_str(), "new data"); + } + { + let node2 = tree.children().nth(2).unwrap(); + assert_eq!(node2.get_data(), None); + } +} diff --git a/tests/common.rs b/tests/common.rs new file mode 100644 index 0000000..70492fc --- /dev/null +++ b/tests/common.rs @@ -0,0 +1,15 @@ +use cstree::{Language, SyntaxKind}; + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum TestLang {} +impl Language for TestLang { + type Kind = SyntaxKind; + + fn kind_from_raw(raw: SyntaxKind) -> Self::Kind { + raw + } + + fn kind_to_raw(kind: Self::Kind) -> SyntaxKind { + kind + } +} diff --git a/vendor/servo_arc/Cargo.toml b/vendor/servo_arc/Cargo.toml new file mode 100644 index 0000000..419858a --- /dev/null +++ b/vendor/servo_arc/Cargo.toml @@ -0,0 +1,35 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g. crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +name = "servo_arc" +version = "0.1.1" +authors = ["The Servo Project Developers"] +description = "A fork of std::sync::Arc with some extra functionality and without weak references" +license = "MIT/Apache-2.0" +repository = "https://github.com/servo/servo" + +[lib] +name = "servo_arc" +path = "lib.rs" +[dependencies.nodrop] +version = "0.1.8" + +[dependencies.serde] +version = "1.0" +optional = true + +[dependencies.stable_deref_trait] +version = "1.0.0" + +[features] +servo = ["serde"] diff --git a/vendor/servo_arc/lib.rs b/vendor/servo_arc/lib.rs new file mode 100644 index 0000000..48942d3 --- /dev/null +++ b/vendor/servo_arc/lib.rs @@ -0,0 +1,1007 @@ +// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Fork of Arc for Servo. This has the following advantages over std::sync::Arc: +//! +//! * We don't waste storage on the weak reference count. +//! * We don't do extra RMU operations to handle the possibility of weak references. +//! * We can experiment with arena allocation (todo). +//! * We can add methods to support our custom use cases [1]. +//! * We have support for dynamically-sized types (see from_header_and_iter). +//! * We have support for thin arcs to unsized types (see ThinArc). +//! +//! [1]: https://bugzilla.mozilla.org/show_bug.cgi?id=1360883 + +// The semantics of Arc are alread documented in the Rust docs, so we don't +// duplicate those here. +#![allow(missing_docs)] + +extern crate nodrop; +#[cfg(feature = "servo")] extern crate serde; +extern crate stable_deref_trait; + +use nodrop::NoDrop; +#[cfg(feature = "servo")] +use serde::{Deserialize, Serialize}; +use stable_deref_trait::{CloneStableDeref, StableDeref}; +use std::{alloc::Layout, isize, mem::align_of_val, usize}; +use std::borrow; +use std::cmp::Ordering; +use std::convert::From; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::iter::{ExactSizeIterator, Iterator}; +use std::mem; +use std::ops::{Deref, DerefMut}; +use std::os::raw::c_void; +use std::process; +use std::ptr; +use std::slice; +use std::sync::atomic; +use std::sync::atomic::Ordering::{Acquire, Relaxed, Release}; + +/// Get the offset within an `ArcInner` for +/// a payload of type described by a pointer. +/// +/// # Safety +/// +/// This has the same safety requirements as `align_of_val_raw`. In effect: +/// +/// - This function is safe for any argument if `T` is sized, and +/// - if `T` is unsized, the pointer must have appropriate pointer metadata +/// acquired from the real instance that you are getting this offset for. +unsafe fn data_offset(ptr: *const T) -> isize { + // Align the unsized value to the end of the `ArcInner`. + // Because it is `?Sized`, it will always be the last field in memory. + // Note: This is a detail of the current implementation of the compiler, + // and is not a guaranteed language detail. Do not rely on it outside of std. + data_offset_align(align_of_val(&*ptr)) +} + +#[inline] +fn data_offset_align(align: usize) -> isize { + let layout = Layout::new::>(); + (layout.size() + padding_needed_for(&layout, align)) as isize +} + +#[inline] +fn padding_needed_for(layout: &Layout, align: usize) -> usize { + let len = layout.size(); + let len_rounded_up = len.wrapping_add(align).wrapping_sub(1) & !align.wrapping_sub(1); + len_rounded_up.wrapping_sub(len) +} + +/// A soft limit on the amount of references that may be made to an `Arc`. +/// +/// Going above this limit will abort your program (although not +/// necessarily) at _exactly_ `MAX_REFCOUNT + 1` references. +const MAX_REFCOUNT: usize = (isize::MAX) as usize; + +/// Wrapper type for pointers to get the non-zero optimization. When +/// NonZero/Shared/Unique are stabilized, we should just use Shared +/// here to get the same effect. Gankro is working on this in [1]. +/// +/// It's unfortunate that this needs to infect all the caller types +/// with 'static. It would be nice to just use a &() and a PhantomData +/// instead, but then the compiler can't determine whether the &() should +/// be thin or fat (which depends on whether or not T is sized). Given +/// that this is all a temporary hack, this restriction is fine for now. +/// +/// [1]: https://github.com/rust-lang/rust/issues/27730 +// FIXME: remove this and use std::ptr::NonNull when Firefox requires Rust 1.25+ +pub struct NonZeroPtrMut(&'static mut T); +impl NonZeroPtrMut { + pub fn new(ptr: *mut T) -> Self { + assert!(!(ptr as *mut u8).is_null()); + NonZeroPtrMut(unsafe { mem::transmute(ptr) }) + } + + pub fn ptr(&self) -> *mut T { + self.0 as *const T as *mut T + } +} + +impl Clone for NonZeroPtrMut { + fn clone(&self) -> Self { + NonZeroPtrMut::new(self.ptr()) + } +} + +impl fmt::Pointer for NonZeroPtrMut { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Pointer::fmt(&self.ptr(), f) + } +} + +impl fmt::Debug for NonZeroPtrMut { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + ::fmt(self, f) + } +} + +impl PartialEq for NonZeroPtrMut { + fn eq(&self, other: &Self) -> bool { + self.ptr() == other.ptr() + } +} + +impl Eq for NonZeroPtrMut {} + +impl Hash for NonZeroPtrMut { + fn hash(&self, state: &mut H) { + self.ptr().hash(state) + } +} + +#[repr(C)] +pub struct Arc { + p: NonZeroPtrMut>, +} + +/// An Arc that is known to be uniquely owned +/// +/// This lets us build arcs that we can mutate before +/// freezing, without needing to change the allocation +pub struct UniqueArc(Arc); + +impl UniqueArc { + #[inline] + /// Construct a new UniqueArc + pub fn new(data: T) -> Self { + UniqueArc(Arc::new(data)) + } + + #[inline] + /// Convert to a shareable Arc once we're done using it + pub fn shareable(self) -> Arc { + self.0 + } +} + +impl Deref for UniqueArc { + type Target = T; + fn deref(&self) -> &T { + &*self.0 + } +} + +impl DerefMut for UniqueArc { + fn deref_mut(&mut self) -> &mut T { + // We know this to be uniquely owned + unsafe { &mut (*self.0.ptr()).data } + } +} + +unsafe impl Send for Arc {} +unsafe impl Sync for Arc {} + +#[repr(C)] +struct ArcInner { + count: atomic::AtomicUsize, + data: T, +} + +unsafe impl Send for ArcInner {} +unsafe impl Sync for ArcInner {} + +impl Arc { + #[inline] + pub fn new(data: T) -> Self { + let x = Box::new(ArcInner { + count: atomic::AtomicUsize::new(1), + data: data, + }); + Arc { p: NonZeroPtrMut::new(Box::into_raw(x)) } + } + + #[inline] + pub fn into_raw(this: Self) -> *const T { + let ptr = unsafe { &((*this.ptr()).data) as *const _ }; + mem::forget(this); + ptr + } + + #[inline] + pub unsafe fn from_raw(ptr: *const T) -> Self { + // To find the corresponding pointer to the `ArcInner` we need + // to subtract the offset of the `data` field from the pointer. + let offset = data_offset(ptr); + let ptr = (ptr as *const u8).offset(-offset); + Arc { + p: NonZeroPtrMut::new(ptr as *mut ArcInner), + } + } + + /// Produce a pointer to the data that can be converted back + /// to an arc + #[inline] + pub fn borrow_arc<'a>(&'a self) -> ArcBorrow<'a, T> { + ArcBorrow(&**self) + } + + /// Temporarily converts |self| into a bonafide RawOffsetArc and exposes it to the + /// provided callback. The refcount is not modified. + #[inline(always)] + pub fn with_raw_offset_arc(&self, f: F) -> U + where F: FnOnce(&RawOffsetArc) -> U + { + // Synthesize transient Arc, which never touches the refcount of the ArcInner. + let transient = unsafe { NoDrop::new(Arc::into_raw_offset(ptr::read(self))) }; + + // Expose the transient Arc to the callback, which may clone it if it wants. + let result = f(&transient); + + // Forget the transient Arc to leave the refcount untouched. + mem::forget(transient); + + // Forward the result. + result + } + + /// Returns the address on the heap of the Arc itself -- not the T within it -- for memory + /// reporting. + pub fn heap_ptr(&self) -> *const c_void { + self.p.ptr() as *const ArcInner as *const c_void + } +} + +impl Arc { + #[inline] + fn inner(&self) -> &ArcInner { + // This unsafety is ok because while this arc is alive we're guaranteed + // that the inner pointer is valid. Furthermore, we know that the + // `ArcInner` structure itself is `Sync` because the inner data is + // `Sync` as well, so we're ok loaning out an immutable pointer to these + // contents. + unsafe { &*self.ptr() } + } + + // Non-inlined part of `drop`. Just invokes the destructor. + #[inline(never)] + unsafe fn drop_slow(&mut self) { + let _ = Box::from_raw(self.ptr()); + } + + + #[inline] + pub fn ptr_eq(this: &Self, other: &Self) -> bool { + this.ptr() == other.ptr() + } + + fn ptr(&self) -> *mut ArcInner { + self.p.ptr() + } +} + +impl Clone for Arc { + #[inline] + fn clone(&self) -> Self { + // Using a relaxed ordering is alright here, as knowledge of the + // original reference prevents other threads from erroneously deleting + // the object. + // + // As explained in the [Boost documentation][1], Increasing the + // reference counter can always be done with memory_order_relaxed: New + // references to an object can only be formed from an existing + // reference, and passing an existing reference from one thread to + // another must already provide any required synchronization. + // + // [1]: (www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html) + let old_size = self.inner().count.fetch_add(1, Relaxed); + + // However we need to guard against massive refcounts in case someone + // is `mem::forget`ing Arcs. If we don't do this the count can overflow + // and users will use-after free. We racily saturate to `isize::MAX` on + // the assumption that there aren't ~2 billion threads incrementing + // the reference count at once. This branch will never be taken in + // any realistic program. + // + // We abort because such a program is incredibly degenerate, and we + // don't care to support it. + if old_size > MAX_REFCOUNT { + process::abort(); + } + + Arc { p: NonZeroPtrMut::new(self.ptr()) } + } +} + +impl Deref for Arc { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + &self.inner().data + } +} + +impl Arc { + #[inline] + pub fn make_mut(this: &mut Self) -> &mut T { + if !this.is_unique() { + // Another pointer exists; clone + *this = Arc::new((**this).clone()); + } + + unsafe { + // This unsafety is ok because we're guaranteed that the pointer + // returned is the *only* pointer that will ever be returned to T. Our + // reference count is guaranteed to be 1 at this point, and we required + // the Arc itself to be `mut`, so we're returning the only possible + // reference to the inner data. + &mut (*this.ptr()).data + } + } +} + +impl Arc { + #[inline] + pub fn get_mut(this: &mut Self) -> Option<&mut T> { + if this.is_unique() { + unsafe { + // See make_mut() for documentation of the threadsafety here. + Some(&mut (*this.ptr()).data) + } + } else { + None + } + } + + #[inline] + pub fn is_unique(&self) -> bool { + // We can use Relaxed here, but the justification is a bit subtle. + // + // The reason to use Acquire would be to synchronize with other threads + // that are modifying the refcount with Release, i.e. to ensure that + // their writes to memory guarded by this refcount are flushed. However, + // we know that threads only modify the contents of the Arc when they + // observe the refcount to be 1, and no other thread could observe that + // because we're holding one strong reference here. + self.inner().count.load(Relaxed) == 1 + } +} + +impl Drop for Arc { + #[inline] + fn drop(&mut self) { + // Because `fetch_sub` is already atomic, we do not need to synchronize + // with other threads unless we are going to delete the object. + if self.inner().count.fetch_sub(1, Release) != 1 { + return; + } + + // FIXME(bholley): Use the updated comment when [2] is merged. + // + // This load is needed to prevent reordering of use of the data and + // deletion of the data. Because it is marked `Release`, the decreasing + // of the reference count synchronizes with this `Acquire` load. This + // means that use of the data happens before decreasing the reference + // count, which happens before this load, which happens before the + // deletion of the data. + // + // As explained in the [Boost documentation][1], + // + // > It is important to enforce any possible access to the object in one + // > thread (through an existing reference) to *happen before* deleting + // > the object in a different thread. This is achieved by a "release" + // > operation after dropping a reference (any access to the object + // > through this reference must obviously happened before), and an + // > "acquire" operation before deleting the object. + // + // [1]: (www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html) + // [2]: https://github.com/rust-lang/rust/pull/41714 + self.inner().count.load(Acquire); + + unsafe { + self.drop_slow(); + } + } +} + +impl PartialEq for Arc { + fn eq(&self, other: &Arc) -> bool { + Self::ptr_eq(self, other) || *(*self) == *(*other) + } + + fn ne(&self, other: &Arc) -> bool { + !Self::ptr_eq(self, other) && *(*self) != *(*other) + } +} +impl PartialOrd for Arc { + fn partial_cmp(&self, other: &Arc) -> Option { + (**self).partial_cmp(&**other) + } + + fn lt(&self, other: &Arc) -> bool { + *(*self) < *(*other) + } + + fn le(&self, other: &Arc) -> bool { + *(*self) <= *(*other) + } + + fn gt(&self, other: &Arc) -> bool { + *(*self) > *(*other) + } + + fn ge(&self, other: &Arc) -> bool { + *(*self) >= *(*other) + } +} +impl Ord for Arc { + fn cmp(&self, other: &Arc) -> Ordering { + (**self).cmp(&**other) + } +} +impl Eq for Arc {} + +impl fmt::Display for Arc { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +impl fmt::Debug for Arc { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +impl fmt::Pointer for Arc { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Pointer::fmt(&self.ptr(), f) + } +} + +impl Default for Arc { + fn default() -> Arc { + Arc::new(Default::default()) + } +} + +impl Hash for Arc { + fn hash(&self, state: &mut H) { + (**self).hash(state) + } +} + +impl From for Arc { + #[inline] + fn from(t: T) -> Self { + Arc::new(t) + } +} + +impl borrow::Borrow for Arc { + #[inline] + fn borrow(&self) -> &T { + &**self + } +} + +impl AsRef for Arc { + #[inline] + fn as_ref(&self) -> &T { + &**self + } +} + +unsafe impl StableDeref for Arc {} +unsafe impl CloneStableDeref for Arc {} + +#[cfg(feature = "servo")] +impl<'de, T: Deserialize<'de>> Deserialize<'de> for Arc +{ + fn deserialize(deserializer: D) -> Result, D::Error> + where + D: ::serde::de::Deserializer<'de>, + { + T::deserialize(deserializer).map(Arc::new) + } +} + +#[cfg(feature = "servo")] +impl Serialize for Arc +{ + fn serialize(&self, serializer: S) -> Result + where + S: ::serde::ser::Serializer, + { + (**self).serialize(serializer) + } +} + +/// Structure to allow Arc-managing some fixed-sized data and a variably-sized +/// slice in a single allocation. +#[derive(Debug, Eq, PartialEq, PartialOrd)] +pub struct HeaderSlice { + /// The fixed-sized data. + pub header: H, + + /// The dynamically-sized data. + pub slice: T, +} + +#[inline(always)] +fn divide_rounding_up(dividend: usize, divisor: usize) -> usize { + (dividend + divisor - 1) / divisor +} + +impl Arc> { + /// Creates an Arc for a HeaderSlice using the given header struct and + /// iterator to generate the slice. The resulting Arc will be fat. + #[inline] + pub fn from_header_and_iter(header: H, mut items: I) -> Self + where I: Iterator + ExactSizeIterator + { + use ::std::mem::size_of; + assert_ne!(size_of::(), 0, "Need to think about ZST"); + + // Compute the required size for the allocation. + let num_items = items.len(); + let size = { + // First, determine the alignment of a hypothetical pointer to a + // HeaderSlice. + let fake_slice_ptr_align: usize = mem::align_of::>>(); + + // Next, synthesize a totally garbage (but properly aligned) pointer + // to a sequence of T. + let fake_slice_ptr = fake_slice_ptr_align as *const T; + + // Convert that sequence to a fat pointer. The address component of + // the fat pointer will be garbage, but the length will be correct. + let fake_slice = unsafe { slice::from_raw_parts(fake_slice_ptr, num_items) }; + + // Pretend the garbage address points to our allocation target (with + // a trailing sequence of T), rather than just a sequence of T. + let fake_ptr = fake_slice as *const [T] as *const ArcInner>; + let fake_ref: &ArcInner> = unsafe { &*fake_ptr }; + + // Use size_of_val, which will combine static information about the + // type with the length from the fat pointer. The garbage address + // will not be used. + mem::size_of_val(fake_ref) + }; + + let ptr: *mut ArcInner>; + unsafe { + // Allocate the buffer. We use Vec because the underlying allocation + // machinery isn't available in stable Rust. + // + // To avoid alignment issues, we allocate words rather than bytes, + // rounding up to the nearest word size. + let buffer = if mem::align_of::() <= mem::align_of::() { + Self::allocate_buffer::(size) + } else if mem::align_of::() <= mem::align_of::() { + // On 32-bit platforms may have 8 byte alignment while usize has 4 byte aligment. + // Use u64 to avoid over-alignment. + // This branch will compile away in optimized builds. + Self::allocate_buffer::(size) + } else { + panic!("Over-aligned type not handled"); + }; + + // Synthesize the fat pointer. We do this by claiming we have a direct + // pointer to a [T], and then changing the type of the borrow. The key + // point here is that the length portion of the fat pointer applies + // only to the number of elements in the dynamically-sized portion of + // the type, so the value will be the same whether it points to a [T] + // or something else with a [T] as its last member. + let fake_slice: &mut [T] = slice::from_raw_parts_mut(buffer as *mut T, num_items); + ptr = fake_slice as *mut [T] as *mut ArcInner>; + + // Write the data. + // + // Note that any panics here (i.e. from the iterator) are safe, since + // we'll just leak the uninitialized memory. + ptr::write(&mut ((*ptr).count), atomic::AtomicUsize::new(1)); + ptr::write(&mut ((*ptr).data.header), header); + if let Some(current) = (*ptr).data.slice.get_mut(0) { + let mut current: *mut T = current; + for _ in 0..num_items { + ptr::write(current, items.next().expect("ExactSizeIterator over-reported length")); + current = current.offset(1); + } + assert!(items.next().is_none(), "ExactSizeIterator under-reported length"); + + // We should have consumed the buffer exactly. + debug_assert_eq!(current as *mut u8, buffer.offset(size as isize)); + } + } + + // Return the fat Arc. + assert_eq!(size_of::(), size_of::() * 2, "The Arc will be fat"); + Arc { p: NonZeroPtrMut::new(ptr) } + } + + #[inline] + unsafe fn allocate_buffer(size: usize) -> *mut u8 { + let words_to_allocate = divide_rounding_up(size, mem::size_of::()); + let mut vec = Vec::::with_capacity(words_to_allocate); + vec.set_len(words_to_allocate); + Box::into_raw(vec.into_boxed_slice()) as *mut W as *mut u8 + } +} + +/// Header data with an inline length. Consumers that use HeaderWithLength as the +/// Header type in HeaderSlice can take advantage of ThinArc. +#[derive(Debug, Eq, PartialEq, PartialOrd)] +pub struct HeaderWithLength { + /// The fixed-sized data. + pub header: H, + + /// The slice length. + length: usize, +} + +impl HeaderWithLength { + /// Creates a new HeaderWithLength. + pub fn new(header: H, length: usize) -> Self { + HeaderWithLength { + header: header, + length: length, + } + } +} + +type HeaderSliceWithLength = HeaderSlice, T>; +pub struct ThinArc { + ptr: *mut ArcInner>, +} + +unsafe impl Send for ThinArc {} +unsafe impl Sync for ThinArc {} + +// Synthesize a fat pointer from a thin pointer. +// +// See the comment around the analogous operation in from_header_and_iter. +fn thin_to_thick(thin: *mut ArcInner>) + -> *mut ArcInner> +{ + let len = unsafe { (*thin).data.header.length }; + let fake_slice: *mut [T] = unsafe { + slice::from_raw_parts_mut(thin as *mut T, len) + }; + + fake_slice as *mut ArcInner> +} + +impl ThinArc { + /// Temporarily converts |self| into a bonafide Arc and exposes it to the + /// provided callback. The refcount is not modified. + #[inline] + pub fn with_arc(&self, f: F) -> U + where F: FnOnce(&Arc>) -> U + { + // Synthesize transient Arc, which never touches the refcount of the ArcInner. + let transient = NoDrop::new(Arc { + p: NonZeroPtrMut::new(thin_to_thick(self.ptr)) + }); + + // Expose the transient Arc to the callback, which may clone it if it wants. + let result = f(&transient); + + // Forget the transient Arc to leave the refcount untouched. + // XXXManishearth this can be removed when unions stabilize, + // since then NoDrop becomes zero overhead + mem::forget(transient); + + // Forward the result. + result + } + + /// Returns the address on the heap of the ThinArc itself -- not the T + /// within it -- for memory reporting. + #[inline] + pub fn heap_ptr(&self) -> *const c_void { + self.ptr as *const ArcInner as *const c_void + } +} + +impl Deref for ThinArc { + type Target = HeaderSliceWithLength; + + #[inline] + fn deref(&self) -> &Self::Target { + unsafe { &(*thin_to_thick(self.ptr)).data } + } +} + +impl Clone for ThinArc { + #[inline] + fn clone(&self) -> Self { + ThinArc::with_arc(self, |a| Arc::into_thin(a.clone())) + } +} + +impl Drop for ThinArc { + #[inline] + fn drop(&mut self) { + let _ = Arc::from_thin(ThinArc { ptr: self.ptr }); + } +} + +impl Arc> { + /// Converts an Arc into a ThinArc. This consumes the Arc, so the refcount + /// is not modified. + #[inline] + pub fn into_thin(a: Self) -> ThinArc { + assert_eq!(a.header.length, a.slice.len(), + "Length needs to be correct for ThinArc to work"); + let fat_ptr: *mut ArcInner> = a.ptr(); + mem::forget(a); + let thin_ptr = fat_ptr as *mut [usize] as *mut usize; + ThinArc { + ptr: thin_ptr as *mut ArcInner> + } + } + + /// Converts a ThinArc into an Arc. This consumes the ThinArc, so the refcount + /// is not modified. + #[inline] + pub fn from_thin(a: ThinArc) -> Self { + let ptr = thin_to_thick(a.ptr); + mem::forget(a); + Arc { + p: NonZeroPtrMut::new(ptr) + } + } +} + +impl PartialEq for ThinArc { + #[inline] + fn eq(&self, other: &ThinArc) -> bool { + ThinArc::with_arc(self, |a| { + ThinArc::with_arc(other, |b| { + *a == *b + }) + }) + } +} + +impl Eq for ThinArc {} + +/// An Arc, except it holds a pointer to the T instead of to the +/// entire ArcInner. +/// +/// ```text +/// Arc RawOffsetArc +/// | | +/// v v +/// --------------------- +/// | RefCount | T (data) | [ArcInner] +/// --------------------- +/// ``` +/// +/// This means that this is a direct pointer to +/// its contained data (and can be read from by both C++ and Rust), +/// but we can also convert it to a "regular" Arc by removing the offset +#[derive(Eq)] +#[repr(C)] +pub struct RawOffsetArc { + ptr: NonZeroPtrMut, +} + +unsafe impl Send for RawOffsetArc {} +unsafe impl Sync for RawOffsetArc {} + +impl Deref for RawOffsetArc { + type Target = T; + fn deref(&self) -> &Self::Target { + unsafe { &*self.ptr.ptr() } + } +} + +impl Clone for RawOffsetArc { + #[inline] + fn clone(&self) -> Self { + Arc::into_raw_offset(self.clone_arc()) + } +} + +impl Drop for RawOffsetArc { + fn drop(&mut self) { + let _ = Arc::from_raw_offset(RawOffsetArc { ptr: self.ptr.clone() }); + } +} + + +impl fmt::Debug for RawOffsetArc { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +impl PartialEq for RawOffsetArc { + fn eq(&self, other: &RawOffsetArc) -> bool { + *(*self) == *(*other) + } + + fn ne(&self, other: &RawOffsetArc) -> bool { + *(*self) != *(*other) + } +} + +impl RawOffsetArc { + /// Temporarily converts |self| into a bonafide Arc and exposes it to the + /// provided callback. The refcount is not modified. + #[inline] + pub fn with_arc(&self, f: F) -> U + where F: FnOnce(&Arc) -> U + { + // Synthesize transient Arc, which never touches the refcount of the ArcInner. + let transient = unsafe { NoDrop::new(Arc::from_raw(self.ptr.ptr())) }; + + // Expose the transient Arc to the callback, which may clone it if it wants. + let result = f(&transient); + + // Forget the transient Arc to leave the refcount untouched. + // XXXManishearth this can be removed when unions stabilize, + // since then NoDrop becomes zero overhead + mem::forget(transient); + + // Forward the result. + result + } + + /// If uniquely owned, provide a mutable reference + /// Else create a copy, and mutate that + #[inline] + pub fn make_mut(&mut self) -> &mut T where T: Clone { + unsafe { + // extract the RawOffsetArc as an owned variable + let this = ptr::read(self); + // treat it as a real Arc + let mut arc = Arc::from_raw_offset(this); + // obtain the mutable reference. Cast away the lifetime + // This may mutate `arc` + let ret = Arc::make_mut(&mut arc) as *mut _; + // Store the possibly-mutated arc back inside, after converting + // it to a RawOffsetArc again + ptr::write(self, Arc::into_raw_offset(arc)); + &mut *ret + } + } + + /// Clone it as an Arc + #[inline] + pub fn clone_arc(&self) -> Arc { + RawOffsetArc::with_arc(self, |a| a.clone()) + } + + /// Produce a pointer to the data that can be converted back + /// to an arc + #[inline] + pub fn borrow_arc<'a>(&'a self) -> ArcBorrow<'a, T> { + ArcBorrow(&**self) + } +} + +impl Arc { + /// Converts an Arc into a RawOffsetArc. This consumes the Arc, so the refcount + /// is not modified. + #[inline] + pub fn into_raw_offset(a: Self) -> RawOffsetArc { + RawOffsetArc { + ptr: NonZeroPtrMut::new(Arc::into_raw(a) as *mut T), + } + } + + /// Converts a RawOffsetArc into an Arc. This consumes the RawOffsetArc, so the refcount + /// is not modified. + #[inline] + pub fn from_raw_offset(a: RawOffsetArc) -> Self { + let ptr = a.ptr.ptr(); + mem::forget(a); + unsafe { Arc::from_raw(ptr) } + } +} + +/// A "borrowed Arc". This is a pointer to +/// a T that is known to have been allocated within an +/// Arc. +/// +/// This is equivalent in guarantees to `&Arc`, however it is +/// a bit more flexible. To obtain an `&Arc` you must have +/// an Arc instance somewhere pinned down until we're done with it. +/// +/// However, Gecko hands us refcounted things as pointers to T directly, +/// so we have to conjure up a temporary Arc on the stack each time. The +/// same happens for when the object is managed by a RawOffsetArc. +/// +/// ArcBorrow lets us deal with borrows of known-refcounted objects +/// without needing to worry about how they're actually stored. +#[derive(Eq, PartialEq)] +pub struct ArcBorrow<'a, T: 'a>(&'a T); + +impl<'a, T> Copy for ArcBorrow<'a, T> {} +impl<'a, T> Clone for ArcBorrow<'a, T> { + #[inline] + fn clone(&self) -> Self { + *self + } +} + +impl<'a, T> ArcBorrow<'a, T> { + #[inline] + pub fn clone_arc(&self) -> Arc { + let arc = unsafe { Arc::from_raw(self.0) }; + // addref it! + mem::forget(arc.clone()); + arc + } + + /// For constructing from a reference known to be Arc-backed, + /// e.g. if we obtain such a reference over FFI + #[inline] + pub unsafe fn from_ref(r: &'a T) -> Self { + ArcBorrow(r) + } + + #[inline] + pub fn with_arc(&self, f: F) -> U where F: FnOnce(&Arc) -> U, T: 'static { + // Synthesize transient Arc, which never touches the refcount. + let transient = unsafe { NoDrop::new(Arc::from_raw(self.0)) }; + + // Expose the transient Arc to the callback, which may clone it if it wants. + let result = f(&transient); + + // Forget the transient Arc to leave the refcount untouched. + // XXXManishearth this can be removed when unions stabilize, + // since then NoDrop becomes zero overhead + mem::forget(transient); + + // Forward the result. + result + } +} + +impl<'a, T> Deref for ArcBorrow<'a, T> { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + &*self.0 + } +} + +#[cfg(test)] +mod tests { + use std::clone::Clone; + use std::ops::Drop; + use std::sync::atomic; + use std::sync::atomic::Ordering::{Acquire, SeqCst}; + use super::{Arc, HeaderWithLength, ThinArc}; + + #[derive(PartialEq)] + struct Canary(*mut atomic::AtomicUsize); + + impl Drop for Canary { + fn drop(&mut self) { + unsafe { (*self.0).fetch_add(1, SeqCst); } + } + } + + #[test] + fn slices_and_thin() { + let mut canary = atomic::AtomicUsize::new(0); + let c = Canary(&mut canary as *mut atomic::AtomicUsize); + let v = vec![5, 6]; + let header = HeaderWithLength::new(c, v.len()); + { + let x = Arc::into_thin(Arc::from_header_and_iter(header, v.into_iter())); + let y = ThinArc::with_arc(&x, |q| q.clone()); + let _ = y.clone(); + let _ = x == x; + Arc::from_thin(x.clone()); + } + assert_eq!(canary.load(Acquire), 1); + } +}