diff options
-rw-r--r-- | src/lexer/mod.rs | 2 | ||||
-rw-r--r-- | src/lib.rs | 20 | ||||
-rw-r--r-- | src/parser/mod.rs | 1 | ||||
-rw-r--r-- | src/text.rs | 11 | ||||
-rw-r--r-- | src/tree/file_builder.rs | 5 | ||||
-rw-r--r-- | src/tree/mod.rs | 24 |
6 files changed, 60 insertions, 3 deletions
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 2f8d3a402..65a994327 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs | |||
@@ -17,6 +17,7 @@ use self::strings::{is_string_literal_start, scan_byte_char_or_string, scan_char | |||
17 | mod comments; | 17 | mod comments; |
18 | use self::comments::{scan_comment, scan_shebang}; | 18 | use self::comments::{scan_comment, scan_shebang}; |
19 | 19 | ||
20 | /// Break a string up into its component tokens | ||
20 | pub fn tokenize(text: &str) -> Vec<Token> { | 21 | pub fn tokenize(text: &str) -> Vec<Token> { |
21 | let mut text = text; | 22 | let mut text = text; |
22 | let mut acc = Vec::new(); | 23 | let mut acc = Vec::new(); |
@@ -28,6 +29,7 @@ pub fn tokenize(text: &str) -> Vec<Token> { | |||
28 | } | 29 | } |
29 | acc | 30 | acc |
30 | } | 31 | } |
32 | /// Get the next token from a string | ||
31 | pub fn next_token(text: &str) -> Token { | 33 | pub fn next_token(text: &str) -> Token { |
32 | assert!(!text.is_empty()); | 34 | assert!(!text.is_empty()); |
33 | let mut ptr = Ptr::new(text); | 35 | let mut ptr = Ptr::new(text); |
diff --git a/src/lib.rs b/src/lib.rs index 39b01a1cb..87a9d11ea 100644 --- a/src/lib.rs +++ b/src/lib.rs | |||
@@ -1,3 +1,20 @@ | |||
1 | //! An experimental implementation of [Rust RFC#2256 libsyntax2.0][rfc#2256]. | ||
2 | //! | ||
3 | //! The intent is to be an IDE-ready parser, i.e. one that offers | ||
4 | //! | ||
5 | //! - easy and fast incremental re-parsing, | ||
6 | //! - graceful handling of errors, and | ||
7 | //! - maintains all information in the source file. | ||
8 | //! | ||
9 | //! For more information, see [the RFC][rfc#2265], or [the working draft][RFC.md]. | ||
10 | //! | ||
11 | //! [rfc#2256]: <https://github.com/rust-lang/rfcs/pull/2256> | ||
12 | //! [RFC.md]: <https://github.com/matklad/libsyntax2/blob/master/docs/RFC.md> | ||
13 | |||
14 | #![forbid(missing_debug_implementations, unconditional_recursion, future_incompatible)] | ||
15 | #![deny(bad_style, unsafe_code, missing_docs)] | ||
16 | //#![warn(unreachable_pub)] // rust-lang/rust#47816 | ||
17 | |||
1 | extern crate unicode_xid; | 18 | extern crate unicode_xid; |
2 | 19 | ||
3 | mod text; | 20 | mod text; |
@@ -6,17 +23,20 @@ mod lexer; | |||
6 | mod parser; | 23 | mod parser; |
7 | 24 | ||
8 | #[cfg_attr(rustfmt, rustfmt_skip)] | 25 | #[cfg_attr(rustfmt, rustfmt_skip)] |
26 | #[allow(missing_docs)] | ||
9 | pub mod syntax_kinds; | 27 | pub mod syntax_kinds; |
10 | pub use text::{TextRange, TextUnit}; | 28 | pub use text::{TextRange, TextUnit}; |
11 | pub use tree::{File, FileBuilder, Node, Sink, SyntaxKind, Token}; | 29 | pub use tree::{File, FileBuilder, Node, Sink, SyntaxKind, Token}; |
12 | pub use lexer::{next_token, tokenize}; | 30 | pub use lexer::{next_token, tokenize}; |
13 | pub use parser::parse; | 31 | pub use parser::parse; |
14 | 32 | ||
33 | /// Utilities for simple uses of the parser. | ||
15 | pub mod utils { | 34 | pub mod utils { |
16 | use std::fmt::Write; | 35 | use std::fmt::Write; |
17 | 36 | ||
18 | use {File, Node}; | 37 | use {File, Node}; |
19 | 38 | ||
39 | /// Parse a file and create a string representation of the resulting parse tree. | ||
20 | pub fn dump_tree(file: &File) -> String { | 40 | pub fn dump_tree(file: &File) -> String { |
21 | let mut result = String::new(); | 41 | let mut result = String::new(); |
22 | go(file.root(), &mut result, 0); | 42 | go(file.root(), &mut result, 0); |
diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d04ed1e75..0f8f2ce0c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs | |||
@@ -6,6 +6,7 @@ use tree::TOMBSTONE; | |||
6 | mod event_parser; | 6 | mod event_parser; |
7 | use self::event_parser::Event; | 7 | use self::event_parser::Event; |
8 | 8 | ||
9 | /// Parse a sequence of tokens into the representative node tree | ||
9 | pub fn parse(text: String, tokens: &[Token]) -> File { | 10 | pub fn parse(text: String, tokens: &[Token]) -> File { |
10 | let events = event_parser::parse(&text, tokens); | 11 | let events = event_parser::parse(&text, tokens); |
11 | from_events_to_file(text, tokens, events) | 12 | from_events_to_file(text, tokens, events) |
diff --git a/src/text.rs b/src/text.rs index ac1a54a75..4084bf44e 100644 --- a/src/text.rs +++ b/src/text.rs | |||
@@ -1,14 +1,17 @@ | |||
1 | use std::fmt; | 1 | use std::fmt; |
2 | use std::ops; | 2 | use std::ops; |
3 | 3 | ||
4 | /// An text position in a source file | ||
4 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | 5 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
5 | pub struct TextUnit(u32); | 6 | pub struct TextUnit(u32); |
6 | 7 | ||
7 | impl TextUnit { | 8 | impl TextUnit { |
9 | /// The positional offset required for one character | ||
8 | pub fn len_of_char(c: char) -> TextUnit { | 10 | pub fn len_of_char(c: char) -> TextUnit { |
9 | TextUnit(c.len_utf8() as u32) | 11 | TextUnit(c.len_utf8() as u32) |
10 | } | 12 | } |
11 | 13 | ||
14 | #[allow(missing_docs)] | ||
12 | pub fn new(val: u32) -> TextUnit { | 15 | pub fn new(val: u32) -> TextUnit { |
13 | TextUnit(val) | 16 | TextUnit(val) |
14 | } | 17 | } |
@@ -64,6 +67,7 @@ impl ops::SubAssign<TextUnit> for TextUnit { | |||
64 | } | 67 | } |
65 | } | 68 | } |
66 | 69 | ||
70 | /// A range of text in a source file | ||
67 | #[derive(Clone, Copy, PartialEq, Eq)] | 71 | #[derive(Clone, Copy, PartialEq, Eq)] |
68 | pub struct TextRange { | 72 | pub struct TextRange { |
69 | start: TextUnit, | 73 | start: TextUnit, |
@@ -83,10 +87,12 @@ impl fmt::Display for TextRange { | |||
83 | } | 87 | } |
84 | 88 | ||
85 | impl TextRange { | 89 | impl TextRange { |
90 | /// An length-0 range of text | ||
86 | pub fn empty() -> TextRange { | 91 | pub fn empty() -> TextRange { |
87 | TextRange::from_to(TextUnit::new(0), TextUnit::new(0)) | 92 | TextRange::from_to(TextUnit::new(0), TextUnit::new(0)) |
88 | } | 93 | } |
89 | 94 | ||
95 | /// The left-inclusive range (`[from..to)`) between to points in the text | ||
90 | pub fn from_to(from: TextUnit, to: TextUnit) -> TextRange { | 96 | pub fn from_to(from: TextUnit, to: TextUnit) -> TextRange { |
91 | assert!(from <= to, "Invalid text range [{}; {})", from, to); | 97 | assert!(from <= to, "Invalid text range [{}; {})", from, to); |
92 | TextRange { | 98 | TextRange { |
@@ -95,22 +101,27 @@ impl TextRange { | |||
95 | } | 101 | } |
96 | } | 102 | } |
97 | 103 | ||
104 | /// The range from some point over some length | ||
98 | pub fn from_len(from: TextUnit, len: TextUnit) -> TextRange { | 105 | pub fn from_len(from: TextUnit, len: TextUnit) -> TextRange { |
99 | TextRange::from_to(from, from + len) | 106 | TextRange::from_to(from, from + len) |
100 | } | 107 | } |
101 | 108 | ||
109 | /// The starting position of this range | ||
102 | pub fn start(&self) -> TextUnit { | 110 | pub fn start(&self) -> TextUnit { |
103 | self.start | 111 | self.start |
104 | } | 112 | } |
105 | 113 | ||
114 | /// The end position of this range | ||
106 | pub fn end(&self) -> TextUnit { | 115 | pub fn end(&self) -> TextUnit { |
107 | self.end | 116 | self.end |
108 | } | 117 | } |
109 | 118 | ||
119 | /// The length of this range | ||
110 | pub fn len(&self) -> TextUnit { | 120 | pub fn len(&self) -> TextUnit { |
111 | self.end - self.start | 121 | self.end - self.start |
112 | } | 122 | } |
113 | 123 | ||
124 | /// Is this range empty of any content? | ||
114 | pub fn is_empty(&self) -> bool { | 125 | pub fn is_empty(&self) -> bool { |
115 | self.start() == self.end() | 126 | self.start() == self.end() |
116 | } | 127 | } |
diff --git a/src/tree/file_builder.rs b/src/tree/file_builder.rs index 939922cb2..738705f02 100644 --- a/src/tree/file_builder.rs +++ b/src/tree/file_builder.rs | |||
@@ -1,3 +1,6 @@ | |||
1 | // FIXME(CAD97): I don't understand this mod well enough to stub out docs for the public symbols yet | ||
2 | #![allow(missing_docs)] | ||
3 | |||
1 | use {SyntaxKind, TextRange, TextUnit}; | 4 | use {SyntaxKind, TextRange, TextUnit}; |
2 | use super::{File, NodeData, NodeIdx, SyntaxErrorData}; | 5 | use super::{File, NodeData, NodeIdx, SyntaxErrorData}; |
3 | 6 | ||
@@ -8,6 +11,7 @@ pub trait Sink { | |||
8 | fn error(&mut self) -> ErrorBuilder; | 11 | fn error(&mut self) -> ErrorBuilder; |
9 | } | 12 | } |
10 | 13 | ||
14 | #[derive(Debug)] | ||
11 | pub struct FileBuilder { | 15 | pub struct FileBuilder { |
12 | text: String, | 16 | text: String, |
13 | nodes: Vec<NodeData>, | 17 | nodes: Vec<NodeData>, |
@@ -139,6 +143,7 @@ fn grow(left: &mut TextRange, right: TextRange) { | |||
139 | *left = TextRange::from_to(left.start(), right.end()) | 143 | *left = TextRange::from_to(left.start(), right.end()) |
140 | } | 144 | } |
141 | 145 | ||
146 | #[derive(Debug)] | ||
142 | pub struct ErrorBuilder<'f> { | 147 | pub struct ErrorBuilder<'f> { |
143 | message: Option<String>, | 148 | message: Option<String>, |
144 | builder: &'f mut FileBuilder, | 149 | builder: &'f mut FileBuilder, |
diff --git a/src/tree/mod.rs b/src/tree/mod.rs index a330caf54..aaf048c73 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs | |||
@@ -7,6 +7,7 @@ use std::cmp; | |||
7 | mod file_builder; | 7 | mod file_builder; |
8 | pub use self::file_builder::{FileBuilder, Sink}; | 8 | pub use self::file_builder::{FileBuilder, Sink}; |
9 | 9 | ||
10 | /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. | ||
10 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | 11 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
11 | pub struct SyntaxKind(pub(crate) u32); | 12 | pub struct SyntaxKind(pub(crate) u32); |
12 | 13 | ||
@@ -37,12 +38,17 @@ pub(crate) struct SyntaxInfo { | |||
37 | pub name: &'static str, | 38 | pub name: &'static str, |
38 | } | 39 | } |
39 | 40 | ||
41 | /// A token of Rust source. | ||
40 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | 42 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
41 | pub struct Token { | 43 | pub struct Token { |
44 | /// The kind of token. | ||
42 | pub kind: SyntaxKind, | 45 | pub kind: SyntaxKind, |
46 | /// The length of the token. | ||
43 | pub len: TextUnit, | 47 | pub len: TextUnit, |
44 | } | 48 | } |
45 | 49 | ||
50 | /// The contents of a Rust source file. | ||
51 | #[derive(Debug)] | ||
46 | pub struct File { | 52 | pub struct File { |
47 | text: String, | 53 | text: String, |
48 | nodes: Vec<NodeData>, | 54 | nodes: Vec<NodeData>, |
@@ -50,6 +56,7 @@ pub struct File { | |||
50 | } | 56 | } |
51 | 57 | ||
52 | impl File { | 58 | impl File { |
59 | /// The root node of this source file. | ||
53 | pub fn root<'f>(&'f self) -> Node<'f> { | 60 | pub fn root<'f>(&'f self) -> Node<'f> { |
54 | assert!(!self.nodes.is_empty()); | 61 | assert!(!self.nodes.is_empty()); |
55 | Node { | 62 | Node { |
@@ -59,6 +66,7 @@ impl File { | |||
59 | } | 66 | } |
60 | } | 67 | } |
61 | 68 | ||
69 | /// A reference to a token in a Rust source file. | ||
62 | #[derive(Clone, Copy)] | 70 | #[derive(Clone, Copy)] |
63 | pub struct Node<'f> { | 71 | pub struct Node<'f> { |
64 | file: &'f File, | 72 | file: &'f File, |
@@ -66,28 +74,34 @@ pub struct Node<'f> { | |||
66 | } | 74 | } |
67 | 75 | ||
68 | impl<'f> Node<'f> { | 76 | impl<'f> Node<'f> { |
77 | /// The kind of the token at this node. | ||
69 | pub fn kind(&self) -> SyntaxKind { | 78 | pub fn kind(&self) -> SyntaxKind { |
70 | self.data().kind | 79 | self.data().kind |
71 | } | 80 | } |
72 | 81 | ||
82 | /// The text range covered by the token at this node. | ||
73 | pub fn range(&self) -> TextRange { | 83 | pub fn range(&self) -> TextRange { |
74 | self.data().range | 84 | self.data().range |
75 | } | 85 | } |
76 | 86 | ||
87 | /// The text at this node. | ||
77 | pub fn text(&self) -> &'f str { | 88 | pub fn text(&self) -> &'f str { |
78 | &self.file.text.as_str()[self.range()] | 89 | &self.file.text.as_str()[self.range()] |
79 | } | 90 | } |
80 | 91 | ||
92 | /// The parent node to this node. | ||
81 | pub fn parent(&self) -> Option<Node<'f>> { | 93 | pub fn parent(&self) -> Option<Node<'f>> { |
82 | self.as_node(self.data().parent) | 94 | self.as_node(self.data().parent) |
83 | } | 95 | } |
84 | 96 | ||
97 | /// The children nodes of this node. | ||
85 | pub fn children(&self) -> Children<'f> { | 98 | pub fn children(&self) -> Children<'f> { |
86 | Children { | 99 | Children { |
87 | next: self.as_node(self.data().first_child), | 100 | next: self.as_node(self.data().first_child), |
88 | } | 101 | } |
89 | } | 102 | } |
90 | 103 | ||
104 | /// Any errors contained in this node. | ||
91 | pub fn errors(&self) -> SyntaxErrors<'f> { | 105 | pub fn errors(&self) -> SyntaxErrors<'f> { |
92 | let pos = self.file.errors.iter().position(|e| e.node == self.idx); | 106 | let pos = self.file.errors.iter().position(|e| e.node == self.idx); |
93 | let next = pos.map(|i| ErrorIdx(i as u32)).map(|idx| SyntaxError { | 107 | let next = pos.map(|i| ErrorIdx(i as u32)).map(|idx| SyntaxError { |
@@ -123,7 +137,7 @@ impl<'f> cmp::PartialEq<Node<'f>> for Node<'f> { | |||
123 | 137 | ||
124 | impl<'f> cmp::Eq for Node<'f> {} | 138 | impl<'f> cmp::Eq for Node<'f> {} |
125 | 139 | ||
126 | #[derive(Clone, Copy)] | 140 | #[derive(Clone, Copy, Debug)] |
127 | pub struct SyntaxError<'f> { | 141 | pub struct SyntaxError<'f> { |
128 | file: &'f File, | 142 | file: &'f File, |
129 | idx: ErrorIdx, | 143 | idx: ErrorIdx, |
@@ -162,6 +176,7 @@ impl<'f> SyntaxError<'f> { | |||
162 | } | 176 | } |
163 | } | 177 | } |
164 | 178 | ||
179 | #[derive(Debug)] | ||
165 | pub struct Children<'f> { | 180 | pub struct Children<'f> { |
166 | next: Option<Node<'f>>, | 181 | next: Option<Node<'f>>, |
167 | } | 182 | } |
@@ -176,6 +191,7 @@ impl<'f> Iterator for Children<'f> { | |||
176 | } | 191 | } |
177 | } | 192 | } |
178 | 193 | ||
194 | #[derive(Debug)] | ||
179 | pub struct SyntaxErrors<'f> { | 195 | pub struct SyntaxErrors<'f> { |
180 | next: Option<SyntaxError<'f>>, | 196 | next: Option<SyntaxError<'f>>, |
181 | } | 197 | } |
@@ -190,9 +206,10 @@ impl<'f> Iterator for SyntaxErrors<'f> { | |||
190 | } | 206 | } |
191 | } | 207 | } |
192 | 208 | ||
193 | #[derive(Clone, Copy, PartialEq, Eq)] | 209 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
194 | struct NodeIdx(u32); | 210 | struct NodeIdx(u32); |
195 | 211 | ||
212 | #[derive(Debug)] | ||
196 | struct NodeData { | 213 | struct NodeData { |
197 | kind: SyntaxKind, | 214 | kind: SyntaxKind, |
198 | range: TextRange, | 215 | range: TextRange, |
@@ -215,9 +232,10 @@ impl ::std::ops::IndexMut<NodeIdx> for Vec<NodeData> { | |||
215 | } | 232 | } |
216 | } | 233 | } |
217 | 234 | ||
218 | #[derive(Clone, Copy)] | 235 | #[derive(Clone, Copy, Debug)] |
219 | struct ErrorIdx(u32); | 236 | struct ErrorIdx(u32); |
220 | 237 | ||
238 | #[derive(Debug)] | ||
221 | struct SyntaxErrorData { | 239 | struct SyntaxErrorData { |
222 | node: NodeIdx, | 240 | node: NodeIdx, |
223 | message: String, | 241 | message: String, |