diff options
-rw-r--r-- | src/lexer/comments.rs | 3 | ||||
-rw-r--r-- | src/lexer/mod.rs | 50 | ||||
-rw-r--r-- | src/lexer/numbers.rs | 3 | ||||
-rw-r--r-- | src/lexer/strings.rs | 3 | ||||
-rw-r--r-- | src/lib.rs | 37 | ||||
-rw-r--r-- | src/parser/event.rs | 33 | ||||
-rw-r--r-- | src/parser/grammar/mod.rs | 13 | ||||
-rw-r--r-- | src/parser/input.rs | 10 | ||||
-rw-r--r-- | src/parser/mod.rs | 26 | ||||
-rw-r--r-- | src/parser/parser/imp.rs | 3 | ||||
-rw-r--r-- | src/parser/parser/mod.rs | 3 | ||||
-rw-r--r-- | src/syntax_kinds/generated.rs (renamed from src/syntax_kinds.rs) | 70 | ||||
-rw-r--r-- | src/syntax_kinds/mod.rs | 27 | ||||
-rw-r--r-- | src/tree/mod.rs | 27 | ||||
-rw-r--r-- | src/yellow/builder.rs (renamed from src/tree/file_builder.rs) | 49 | ||||
-rw-r--r-- | src/yellow/mod.rs | 4 | ||||
-rw-r--r-- | src/yellow/syntax.rs | 10 | ||||
-rw-r--r-- | tests/parser.rs | 5 | ||||
-rw-r--r-- | tools/src/bin/gen.rs | 18 | ||||
-rw-r--r-- | tools/src/bin/parse.rs | 5 |
20 files changed, 206 insertions, 193 deletions
diff --git a/src/lexer/comments.rs b/src/lexer/comments.rs index d1e958817..01acb6515 100644 --- a/src/lexer/comments.rs +++ b/src/lexer/comments.rs | |||
@@ -1,7 +1,6 @@ | |||
1 | use lexer::ptr::Ptr; | 1 | use lexer::ptr::Ptr; |
2 | 2 | ||
3 | use SyntaxKind; | 3 | use SyntaxKind::{self, *}; |
4 | use syntax_kinds::*; | ||
5 | 4 | ||
6 | pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { | 5 | pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { |
7 | if ptr.next_is('!') && ptr.nnext_is('/') { | 6 | if ptr.next_is('!') && ptr.nnext_is('/') { |
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 65a994327..69cab5b57 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs | |||
@@ -1,21 +1,32 @@ | |||
1 | use {SyntaxKind, Token}; | ||
2 | use syntax_kinds::*; | ||
3 | |||
4 | mod ptr; | 1 | mod ptr; |
5 | use self::ptr::Ptr; | 2 | mod comments; |
6 | 3 | mod strings; | |
4 | mod numbers; | ||
7 | mod classes; | 5 | mod classes; |
8 | use self::classes::*; | ||
9 | 6 | ||
10 | mod numbers; | 7 | use { |
11 | use self::numbers::scan_number; | 8 | TextUnit, |
9 | SyntaxKind::{self, *}, | ||
10 | }; | ||
12 | 11 | ||
13 | mod strings; | 12 | use self::{ |
14 | use self::strings::{is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string, | 13 | ptr::Ptr, |
15 | scan_string}; | 14 | classes::*, |
15 | numbers::scan_number, | ||
16 | strings::{ | ||
17 | is_string_literal_start, scan_byte_char_or_string, scan_char, | ||
18 | scan_raw_string, scan_string}, | ||
19 | comments::{scan_comment, scan_shebang}, | ||
20 | }; | ||
16 | 21 | ||
17 | mod comments; | 22 | /// A token of Rust source. |
18 | use self::comments::{scan_comment, scan_shebang}; | 23 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
24 | pub struct Token { | ||
25 | /// The kind of token. | ||
26 | pub kind: SyntaxKind, | ||
27 | /// The length of the token. | ||
28 | pub len: TextUnit, | ||
29 | } | ||
19 | 30 | ||
20 | /// Break a string up into its component tokens | 31 | /// Break a string up into its component tokens |
21 | pub fn tokenize(text: &str) -> Vec<Token> { | 32 | pub fn tokenize(text: &str) -> Vec<Token> { |
@@ -29,6 +40,7 @@ pub fn tokenize(text: &str) -> Vec<Token> { | |||
29 | } | 40 | } |
30 | acc | 41 | acc |
31 | } | 42 | } |
43 | |||
32 | /// Get the next token from a string | 44 | /// Get the next token from a string |
33 | pub fn next_token(text: &str) -> Token { | 45 | pub fn next_token(text: &str) -> Token { |
34 | assert!(!text.is_empty()); | 46 | assert!(!text.is_empty()); |
@@ -109,7 +121,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
109 | DOTDOT | 121 | DOTDOT |
110 | } | 122 | } |
111 | _ => DOT, | 123 | _ => DOT, |
112 | } | 124 | }; |
113 | } | 125 | } |
114 | ':' => { | 126 | ':' => { |
115 | return match ptr.next() { | 127 | return match ptr.next() { |
@@ -118,7 +130,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
118 | COLONCOLON | 130 | COLONCOLON |
119 | } | 131 | } |
120 | _ => COLON, | 132 | _ => COLON, |
121 | } | 133 | }; |
122 | } | 134 | } |
123 | '=' => { | 135 | '=' => { |
124 | return match ptr.next() { | 136 | return match ptr.next() { |
@@ -131,7 +143,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
131 | FAT_ARROW | 143 | FAT_ARROW |
132 | } | 144 | } |
133 | _ => EQ, | 145 | _ => EQ, |
134 | } | 146 | }; |
135 | } | 147 | } |
136 | '!' => { | 148 | '!' => { |
137 | return match ptr.next() { | 149 | return match ptr.next() { |
@@ -140,7 +152,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
140 | NEQ | 152 | NEQ |
141 | } | 153 | } |
142 | _ => EXCL, | 154 | _ => EXCL, |
143 | } | 155 | }; |
144 | } | 156 | } |
145 | '-' => { | 157 | '-' => { |
146 | return if ptr.next_is('>') { | 158 | return if ptr.next_is('>') { |
@@ -148,7 +160,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
148 | THIN_ARROW | 160 | THIN_ARROW |
149 | } else { | 161 | } else { |
150 | MINUS | 162 | MINUS |
151 | } | 163 | }; |
152 | } | 164 | } |
153 | 165 | ||
154 | // If the character is an ident start not followed by another single | 166 | // If the character is an ident start not followed by another single |
@@ -202,7 +214,7 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
202 | return if c == '_' { UNDERSCORE } else { IDENT }; | 214 | return if c == '_' { UNDERSCORE } else { IDENT }; |
203 | } | 215 | } |
204 | ptr.bump_while(is_ident_continue); | 216 | ptr.bump_while(is_ident_continue); |
205 | if let Some(kind) = ident_to_keyword(ptr.current_token_text()) { | 217 | if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) { |
206 | return kind; | 218 | return kind; |
207 | } | 219 | } |
208 | IDENT | 220 | IDENT |
diff --git a/src/lexer/numbers.rs b/src/lexer/numbers.rs index 95e42246f..38eac9212 100644 --- a/src/lexer/numbers.rs +++ b/src/lexer/numbers.rs | |||
@@ -1,8 +1,7 @@ | |||
1 | use lexer::ptr::Ptr; | 1 | use lexer::ptr::Ptr; |
2 | use lexer::classes::*; | 2 | use lexer::classes::*; |
3 | 3 | ||
4 | use SyntaxKind; | 4 | use SyntaxKind::{self, *}; |
5 | use syntax_kinds::*; | ||
6 | 5 | ||
7 | pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { | 6 | pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { |
8 | if c == '0' { | 7 | if c == '0' { |
diff --git a/src/lexer/strings.rs b/src/lexer/strings.rs index 00a84ec85..e3704fbb3 100644 --- a/src/lexer/strings.rs +++ b/src/lexer/strings.rs | |||
@@ -1,5 +1,4 @@ | |||
1 | use SyntaxKind; | 1 | use SyntaxKind::{self, *}; |
2 | use syntax_kinds::*; | ||
3 | 2 | ||
4 | use lexer::ptr::Ptr; | 3 | use lexer::ptr::Ptr; |
5 | 4 | ||
diff --git a/src/lib.rs b/src/lib.rs index 619ad62e5..4260e22e7 100644 --- a/src/lib.rs +++ b/src/lib.rs | |||
@@ -19,27 +19,36 @@ | |||
19 | extern crate unicode_xid; | 19 | extern crate unicode_xid; |
20 | extern crate text_unit; | 20 | extern crate text_unit; |
21 | 21 | ||
22 | mod tree; | ||
23 | mod lexer; | 22 | mod lexer; |
24 | mod parser; | 23 | mod parser; |
25 | mod yellow; | 24 | mod yellow; |
25 | mod syntax_kinds; | ||
26 | |||
27 | pub use { | ||
28 | text_unit::{TextRange, TextUnit}, | ||
29 | syntax_kinds::SyntaxKind, | ||
30 | yellow::{SyntaxNode}, | ||
31 | lexer::{tokenize, Token}, | ||
32 | }; | ||
33 | |||
34 | pub(crate) use { | ||
35 | yellow::SyntaxError | ||
36 | }; | ||
37 | |||
38 | pub fn parse(text: String) -> SyntaxNode { | ||
39 | let tokens = tokenize(&text); | ||
40 | parser::parse::<yellow::GreenBuilder>(text, &tokens) | ||
41 | } | ||
26 | 42 | ||
27 | pub mod syntax_kinds; | ||
28 | pub use text_unit::{TextRange, TextUnit}; | ||
29 | pub use tree::{SyntaxKind, Token}; | ||
30 | pub(crate) use tree::{Sink, GreenBuilder}; | ||
31 | pub use lexer::{next_token, tokenize}; | ||
32 | pub use yellow::SyntaxNode; | ||
33 | pub(crate) use yellow::SError; | ||
34 | pub use parser::{parse_green}; | ||
35 | 43 | ||
36 | /// Utilities for simple uses of the parser. | 44 | /// Utilities for simple uses of the parser. |
37 | pub mod utils { | 45 | pub mod utils { |
38 | use std::fmt::Write; | 46 | use std::{ |
47 | fmt::Write, | ||
48 | collections::BTreeSet | ||
49 | }; | ||
39 | 50 | ||
40 | use {SyntaxNode}; | 51 | use {SyntaxNode, SyntaxError}; |
41 | use std::collections::BTreeSet; | ||
42 | use SError; | ||
43 | 52 | ||
44 | /// Parse a file and create a string representation of the resulting parse tree. | 53 | /// Parse a file and create a string representation of the resulting parse tree. |
45 | pub fn dump_tree_green(syntax: &SyntaxNode) -> String { | 54 | pub fn dump_tree_green(syntax: &SyntaxNode) -> String { |
@@ -48,7 +57,7 @@ pub mod utils { | |||
48 | go(syntax, &mut result, 0, &mut errors); | 57 | go(syntax, &mut result, 0, &mut errors); |
49 | return result; | 58 | return result; |
50 | 59 | ||
51 | fn go(node: &SyntaxNode, buff: &mut String, level: usize, errors: &mut BTreeSet<SError>) { | 60 | fn go(node: &SyntaxNode, buff: &mut String, level: usize, errors: &mut BTreeSet<SyntaxError>) { |
52 | buff.push_str(&String::from(" ").repeat(level)); | 61 | buff.push_str(&String::from(" ").repeat(level)); |
53 | write!(buff, "{:?}\n", node).unwrap(); | 62 | write!(buff, "{:?}\n", node).unwrap(); |
54 | let my_errors: Vec<_> = errors.iter().filter(|e| e.offset == node.range().start()) | 63 | let my_errors: Vec<_> = errors.iter().filter(|e| e.offset == node.range().start()) |
diff --git a/src/parser/event.rs b/src/parser/event.rs index 83039c664..a8d503b3d 100644 --- a/src/parser/event.rs +++ b/src/parser/event.rs | |||
@@ -1,8 +1,29 @@ | |||
1 | //! This module provides a way to construct a `File`. | ||
2 | //! It is intended to be completely decoupled from the | ||
3 | //! parser, so as to allow to evolve the tree representation | ||
4 | //! and the parser algorithm independently. | ||
5 | //! | ||
6 | //! The `Sink` trait is the bridge between the parser and the | ||
7 | //! tree builder: the parser produces a stream of events like | ||
8 | //! `start node`, `finish node`, and `FileBuilder` converts | ||
9 | //! this stream to a real tree. | ||
1 | use { | 10 | use { |
2 | Sink, SyntaxKind, Token, | 11 | TextUnit, |
3 | syntax_kinds::TOMBSTONE, | 12 | SyntaxKind::{self, TOMBSTONE}, |
13 | lexer::Token, | ||
4 | }; | 14 | }; |
5 | use super::is_insignificant; | 15 | |
16 | pub(crate) trait Sink { | ||
17 | type Tree; | ||
18 | |||
19 | fn new(text: String) -> Self; | ||
20 | |||
21 | fn leaf(&mut self, kind: SyntaxKind, len: TextUnit); | ||
22 | fn start_internal(&mut self, kind: SyntaxKind); | ||
23 | fn finish_internal(&mut self); | ||
24 | fn error(&mut self, err: String); | ||
25 | fn finish(self) -> Self::Tree; | ||
26 | } | ||
6 | 27 | ||
7 | /// `Parser` produces a flat list of `Event`s. | 28 | /// `Parser` produces a flat list of `Event`s. |
8 | /// They are converted to a tree-structure in | 29 | /// They are converted to a tree-structure in |
@@ -67,7 +88,7 @@ pub(crate) enum Event { | |||
67 | }, | 88 | }, |
68 | } | 89 | } |
69 | 90 | ||
70 | pub(super) fn process(builder: &mut Sink, tokens: &[Token], events: Vec<Event>) { | 91 | pub(super) fn process(builder: &mut impl Sink, tokens: &[Token], events: Vec<Event>) { |
71 | let mut idx = 0; | 92 | let mut idx = 0; |
72 | 93 | ||
73 | let mut holes = Vec::new(); | 94 | let mut holes = Vec::new(); |
@@ -111,7 +132,7 @@ pub(super) fn process(builder: &mut Sink, tokens: &[Token], events: Vec<Event>) | |||
111 | &Event::Finish => { | 132 | &Event::Finish => { |
112 | while idx < tokens.len() { | 133 | while idx < tokens.len() { |
113 | let token = tokens[idx]; | 134 | let token = tokens[idx]; |
114 | if is_insignificant(token.kind) { | 135 | if token.kind.is_trivia() { |
115 | idx += 1; | 136 | idx += 1; |
116 | builder.leaf(token.kind, token.len); | 137 | builder.leaf(token.kind, token.len); |
117 | } else { | 138 | } else { |
@@ -128,7 +149,7 @@ pub(super) fn process(builder: &mut Sink, tokens: &[Token], events: Vec<Event>) | |||
128 | // this should be done in a sensible manner instead | 149 | // this should be done in a sensible manner instead |
129 | loop { | 150 | loop { |
130 | let token = tokens[idx]; | 151 | let token = tokens[idx]; |
131 | if !is_insignificant(token.kind) { | 152 | if !token.kind.is_trivia() { |
132 | break; | 153 | break; |
133 | } | 154 | } |
134 | builder.leaf(token.kind, token.len); | 155 | builder.leaf(token.kind, token.len); |
diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs index 23216452f..085e62d56 100644 --- a/src/parser/grammar/mod.rs +++ b/src/parser/grammar/mod.rs | |||
@@ -21,11 +21,6 @@ | |||
21 | //! After adding a new inline-test, run `cargo collect-tests` to extract | 21 | //! After adding a new inline-test, run `cargo collect-tests` to extract |
22 | //! it as a standalone text-fixture into `tests/data/parser/inline`, and | 22 | //! it as a standalone text-fixture into `tests/data/parser/inline`, and |
23 | //! run `cargo test` once to create the "gold" value. | 23 | //! run `cargo test` once to create the "gold" value. |
24 | use parser::parser::Parser; | ||
25 | use parser::token_set::TokenSet; | ||
26 | use SyntaxKind; | ||
27 | use syntax_kinds::*; | ||
28 | |||
29 | mod items; | 24 | mod items; |
30 | mod attributes; | 25 | mod attributes; |
31 | mod expressions; | 26 | mod expressions; |
@@ -34,6 +29,14 @@ mod patterns; | |||
34 | mod paths; | 29 | mod paths; |
35 | mod type_params; | 30 | mod type_params; |
36 | 31 | ||
32 | use { | ||
33 | SyntaxKind::{self, *}, | ||
34 | parser::{ | ||
35 | parser::Parser, | ||
36 | token_set::TokenSet | ||
37 | } | ||
38 | }; | ||
39 | |||
37 | pub(crate) fn file(p: &mut Parser) { | 40 | pub(crate) fn file(p: &mut Parser) { |
38 | let file = p.start(); | 41 | let file = p.start(); |
39 | p.eat(SHEBANG); | 42 | p.eat(SHEBANG); |
diff --git a/src/parser/input.rs b/src/parser/input.rs index 9b400b959..052981fbc 100644 --- a/src/parser/input.rs +++ b/src/parser/input.rs | |||
@@ -1,6 +1,8 @@ | |||
1 | use {SyntaxKind, TextRange, TextUnit, Token}; | 1 | use { |
2 | use syntax_kinds::EOF; | 2 | SyntaxKind, TextRange, TextUnit, |
3 | use super::is_insignificant; | 3 | SyntaxKind::EOF, |
4 | lexer::Token, | ||
5 | }; | ||
4 | 6 | ||
5 | use std::ops::{Add, AddAssign}; | 7 | use std::ops::{Add, AddAssign}; |
6 | 8 | ||
@@ -16,7 +18,7 @@ impl<'t> ParserInput<'t> { | |||
16 | let mut start_offsets = Vec::new(); | 18 | let mut start_offsets = Vec::new(); |
17 | let mut len = 0.into(); | 19 | let mut len = 0.into(); |
18 | for &token in raw_tokens.iter() { | 20 | for &token in raw_tokens.iter() { |
19 | if !is_insignificant(token.kind) { | 21 | if !token.kind.is_trivia() { |
20 | tokens.push(token); | 22 | tokens.push(token); |
21 | start_offsets.push(len); | 23 | start_offsets.push(len); |
22 | } | 24 | } |
diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b7d5e5832..e72ab05af 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs | |||
@@ -5,18 +5,16 @@ mod input; | |||
5 | mod event; | 5 | mod event; |
6 | mod grammar; | 6 | mod grammar; |
7 | 7 | ||
8 | use std::sync::Arc; | ||
9 | use { | 8 | use { |
10 | Token, | 9 | lexer::Token, |
11 | yellow::SyntaxNode, | 10 | parser::event::{process} |
12 | syntax_kinds::* | ||
13 | }; | 11 | }; |
14 | use GreenBuilder; | 12 | |
15 | use parser::event::process; | 13 | pub(crate) use self::event::Sink; |
16 | 14 | ||
17 | 15 | ||
18 | /// Parse a sequence of tokens into the representative node tree | 16 | /// Parse a sequence of tokens into the representative node tree |
19 | pub fn parse_green(text: String, tokens: &[Token]) -> SyntaxNode { | 17 | pub(crate) fn parse<S: Sink>(text: String, tokens: &[Token]) -> S::Tree { |
20 | let events = { | 18 | let events = { |
21 | let input = input::ParserInput::new(&text, tokens); | 19 | let input = input::ParserInput::new(&text, tokens); |
22 | let parser_impl = parser::imp::ParserImpl::new(&input); | 20 | let parser_impl = parser::imp::ParserImpl::new(&input); |
@@ -24,15 +22,7 @@ pub fn parse_green(text: String, tokens: &[Token]) -> SyntaxNode { | |||
24 | grammar::file(&mut parser); | 22 | grammar::file(&mut parser); |
25 | parser.0.into_events() | 23 | parser.0.into_events() |
26 | }; | 24 | }; |
27 | let mut builder = GreenBuilder::new(text); | 25 | let mut sink = S::new(text); |
28 | process(&mut builder, tokens, events); | 26 | process(&mut sink, tokens, events); |
29 | let (green, errors) = builder.finish(); | 27 | sink.finish() |
30 | SyntaxNode::new(Arc::new(green), errors) | ||
31 | } | ||
32 | |||
33 | fn is_insignificant(kind: SyntaxKind) -> bool { | ||
34 | match kind { | ||
35 | WHITESPACE | COMMENT => true, | ||
36 | _ => false, | ||
37 | } | ||
38 | } | 28 | } |
diff --git a/src/parser/parser/imp.rs b/src/parser/parser/imp.rs index f2641c388..38237ac06 100644 --- a/src/parser/parser/imp.rs +++ b/src/parser/parser/imp.rs | |||
@@ -1,8 +1,7 @@ | |||
1 | use parser::input::{InputPosition, ParserInput}; | 1 | use parser::input::{InputPosition, ParserInput}; |
2 | use parser::event::Event; | 2 | use parser::event::Event; |
3 | 3 | ||
4 | use SyntaxKind; | 4 | use SyntaxKind::{self, EOF, TOMBSTONE}; |
5 | use syntax_kinds::{EOF, TOMBSTONE}; | ||
6 | 5 | ||
7 | /// Implementation details of `Parser`, extracted | 6 | /// Implementation details of `Parser`, extracted |
8 | /// to a separate struct in order not to pollute | 7 | /// to a separate struct in order not to pollute |
diff --git a/src/parser/parser/mod.rs b/src/parser/parser/mod.rs index 58f820738..0930ff9e4 100644 --- a/src/parser/parser/mod.rs +++ b/src/parser/parser/mod.rs | |||
@@ -1,5 +1,4 @@ | |||
1 | use SyntaxKind; | 1 | use SyntaxKind::{self, ERROR}; |
2 | use syntax_kinds::ERROR; | ||
3 | 2 | ||
4 | pub(super) mod imp; | 3 | pub(super) mod imp; |
5 | use self::imp::ParserImpl; | 4 | use self::imp::ParserImpl; |
diff --git a/src/syntax_kinds.rs b/src/syntax_kinds/generated.rs index 1cc29bb61..d332fd02e 100644 --- a/src/syntax_kinds.rs +++ b/src/syntax_kinds/generated.rs | |||
@@ -1,7 +1,7 @@ | |||
1 | #![allow(bad_style, missing_docs, unreachable_pub)] | 1 | #![allow(bad_style, missing_docs, unreachable_pub)] |
2 | #![cfg_attr(rustfmt, rustfmt_skip)] | 2 | #![cfg_attr(rustfmt, rustfmt_skip)] |
3 | //! Generated from grammar.ron | 3 | //! Generated from grammar.ron |
4 | use tree::SyntaxInfo; | 4 | use super::SyntaxInfo; |
5 | 5 | ||
6 | /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. | 6 | /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. |
7 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | 7 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
@@ -288,39 +288,39 @@ impl SyntaxKind { | |||
288 | EOF => &SyntaxInfo { name: "EOF" }, | 288 | EOF => &SyntaxInfo { name: "EOF" }, |
289 | } | 289 | } |
290 | } | 290 | } |
291 | } | 291 | pub(crate) fn from_keyword(ident: &str) -> Option<SyntaxKind> { |
292 | 292 | match ident { | |
293 | pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> { | 293 | "use" => Some(USE_KW), |
294 | match ident { | 294 | "fn" => Some(FN_KW), |
295 | "use" => Some(USE_KW), | 295 | "struct" => Some(STRUCT_KW), |
296 | "fn" => Some(FN_KW), | 296 | "enum" => Some(ENUM_KW), |
297 | "struct" => Some(STRUCT_KW), | 297 | "trait" => Some(TRAIT_KW), |
298 | "enum" => Some(ENUM_KW), | 298 | "impl" => Some(IMPL_KW), |
299 | "trait" => Some(TRAIT_KW), | 299 | "true" => Some(TRUE_KW), |
300 | "impl" => Some(IMPL_KW), | 300 | "false" => Some(FALSE_KW), |
301 | "true" => Some(TRUE_KW), | 301 | "as" => Some(AS_KW), |
302 | "false" => Some(FALSE_KW), | 302 | "extern" => Some(EXTERN_KW), |
303 | "as" => Some(AS_KW), | 303 | "crate" => Some(CRATE_KW), |
304 | "extern" => Some(EXTERN_KW), | 304 | "mod" => Some(MOD_KW), |
305 | "crate" => Some(CRATE_KW), | 305 | "pub" => Some(PUB_KW), |
306 | "mod" => Some(MOD_KW), | 306 | "self" => Some(SELF_KW), |
307 | "pub" => Some(PUB_KW), | 307 | "super" => Some(SUPER_KW), |
308 | "self" => Some(SELF_KW), | 308 | "in" => Some(IN_KW), |
309 | "super" => Some(SUPER_KW), | 309 | "where" => Some(WHERE_KW), |
310 | "in" => Some(IN_KW), | 310 | "for" => Some(FOR_KW), |
311 | "where" => Some(WHERE_KW), | 311 | "loop" => Some(LOOP_KW), |
312 | "for" => Some(FOR_KW), | 312 | "while" => Some(WHILE_KW), |
313 | "loop" => Some(LOOP_KW), | 313 | "if" => Some(IF_KW), |
314 | "while" => Some(WHILE_KW), | 314 | "match" => Some(MATCH_KW), |
315 | "if" => Some(IF_KW), | 315 | "const" => Some(CONST_KW), |
316 | "match" => Some(MATCH_KW), | 316 | "static" => Some(STATIC_KW), |
317 | "const" => Some(CONST_KW), | 317 | "mut" => Some(MUT_KW), |
318 | "static" => Some(STATIC_KW), | 318 | "unsafe" => Some(UNSAFE_KW), |
319 | "mut" => Some(MUT_KW), | 319 | "type" => Some(TYPE_KW), |
320 | "unsafe" => Some(UNSAFE_KW), | 320 | "ref" => Some(REF_KW), |
321 | "type" => Some(TYPE_KW), | 321 | "let" => Some(LET_KW), |
322 | "ref" => Some(REF_KW), | 322 | _ => None, |
323 | "let" => Some(LET_KW), | 323 | } |
324 | _ => None, | ||
325 | } | 324 | } |
326 | } | 325 | } |
326 | |||
diff --git a/src/syntax_kinds/mod.rs b/src/syntax_kinds/mod.rs new file mode 100644 index 000000000..a8e9bfe29 --- /dev/null +++ b/src/syntax_kinds/mod.rs | |||
@@ -0,0 +1,27 @@ | |||
1 | mod generated; | ||
2 | |||
3 | use std::fmt; | ||
4 | use ::{SyntaxKind::*}; | ||
5 | |||
6 | pub use self::generated::SyntaxKind; | ||
7 | |||
8 | impl fmt::Debug for SyntaxKind { | ||
9 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
10 | let name = self.info().name; | ||
11 | f.write_str(name) | ||
12 | } | ||
13 | } | ||
14 | |||
15 | pub(crate) struct SyntaxInfo { | ||
16 | pub name: &'static str, | ||
17 | } | ||
18 | |||
19 | |||
20 | impl SyntaxKind { | ||
21 | pub(crate) fn is_trivia(self: SyntaxKind) -> bool { | ||
22 | match self { | ||
23 | WHITESPACE | COMMENT | DOC_COMMENT => true, | ||
24 | _ => false, | ||
25 | } | ||
26 | } | ||
27 | } | ||
diff --git a/src/tree/mod.rs b/src/tree/mod.rs deleted file mode 100644 index efba82825..000000000 --- a/src/tree/mod.rs +++ /dev/null | |||
@@ -1,27 +0,0 @@ | |||
1 | mod file_builder; | ||
2 | |||
3 | use ::{TextUnit}; | ||
4 | use std::{fmt}; | ||
5 | pub(crate) use self::file_builder::{Sink, GreenBuilder}; | ||
6 | |||
7 | pub use syntax_kinds::SyntaxKind; | ||
8 | |||
9 | impl fmt::Debug for SyntaxKind { | ||
10 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
11 | let name = self.info().name; | ||
12 | f.write_str(name) | ||
13 | } | ||
14 | } | ||
15 | |||
16 | pub(crate) struct SyntaxInfo { | ||
17 | pub name: &'static str, | ||
18 | } | ||
19 | |||
20 | /// A token of Rust source. | ||
21 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||
22 | pub struct Token { | ||
23 | /// The kind of token. | ||
24 | pub kind: SyntaxKind, | ||
25 | /// The length of the token. | ||
26 | pub len: TextUnit, | ||
27 | } | ||
diff --git a/src/tree/file_builder.rs b/src/yellow/builder.rs index f5d1751f9..346d561cd 100644 --- a/src/tree/file_builder.rs +++ b/src/yellow/builder.rs | |||
@@ -1,36 +1,26 @@ | |||
1 | //! This module provides a way to construct a `File`. | ||
2 | //! It is intended to be completely decoupled from the | ||
3 | //! parser, so as to allow to evolve the tree representation | ||
4 | //! and the parser algorithm independently. | ||
5 | //! | ||
6 | //! The `Sink` trait is the bridge between the parser and the | ||
7 | //! tree builder: the parser produces a stream of events like | ||
8 | //! `start node`, `finish node`, and `FileBuilder` converts | ||
9 | //! this stream to a real tree. | ||
10 | use std::sync::Arc; | 1 | use std::sync::Arc; |
11 | use { | 2 | use { |
12 | SyntaxKind, TextRange, TextUnit, | 3 | SyntaxKind, TextRange, TextUnit, |
13 | yellow::GreenNode | 4 | yellow::{SyntaxNode, GreenNode, SyntaxError}, |
5 | parser::Sink | ||
14 | }; | 6 | }; |
15 | use SError; | ||
16 | |||
17 | pub(crate) trait Sink { | ||
18 | fn leaf(&mut self, kind: SyntaxKind, len: TextUnit); | ||
19 | fn start_internal(&mut self, kind: SyntaxKind); | ||
20 | fn finish_internal(&mut self); | ||
21 | fn error(&mut self, err: String); | ||
22 | } | ||
23 | 7 | ||
24 | pub(crate) struct GreenBuilder { | 8 | pub(crate) struct GreenBuilder { |
25 | text: String, | 9 | text: String, |
26 | stack: Vec<GreenNode>, | 10 | stack: Vec<GreenNode>, |
27 | pos: TextUnit, | 11 | pos: TextUnit, |
28 | root: Option<GreenNode>, | 12 | root: Option<GreenNode>, |
29 | errors: Vec<SError>, | 13 | errors: Vec<SyntaxError>, |
30 | } | 14 | } |
31 | 15 | ||
32 | impl GreenBuilder { | 16 | impl GreenBuilder { |
33 | pub(crate) fn new(text: String) -> GreenBuilder { | 17 | |
18 | } | ||
19 | |||
20 | impl Sink for GreenBuilder { | ||
21 | type Tree = SyntaxNode; | ||
22 | |||
23 | fn new(text: String) -> Self { | ||
34 | GreenBuilder { | 24 | GreenBuilder { |
35 | text, | 25 | text, |
36 | stack: Vec::new(), | 26 | stack: Vec::new(), |
@@ -40,12 +30,6 @@ impl GreenBuilder { | |||
40 | } | 30 | } |
41 | } | 31 | } |
42 | 32 | ||
43 | pub(crate) fn finish(self) -> (GreenNode, Vec<SError>) { | ||
44 | (self.root.unwrap(), self.errors) | ||
45 | } | ||
46 | } | ||
47 | |||
48 | impl Sink for GreenBuilder { | ||
49 | fn leaf(&mut self, kind: SyntaxKind, len: TextUnit) { | 33 | fn leaf(&mut self, kind: SyntaxKind, len: TextUnit) { |
50 | let range = TextRange::offset_len(self.pos, len); | 34 | let range = TextRange::offset_len(self.pos, len); |
51 | self.pos += len; | 35 | self.pos += len; |
@@ -73,15 +57,12 @@ impl Sink for GreenBuilder { | |||
73 | } | 57 | } |
74 | 58 | ||
75 | fn error(&mut self, message: String) { | 59 | fn error(&mut self, message: String) { |
76 | self.errors.push(SError { message, offset: self.pos }) | 60 | self.errors.push(SyntaxError { message, offset: self.pos }) |
77 | } | 61 | } |
78 | } | 62 | |
79 | impl SyntaxKind { | 63 | fn finish(self) -> SyntaxNode { |
80 | fn is_trivia(self) -> bool { | 64 | SyntaxNode::new(Arc::new(self.root.unwrap()), self.errors) |
81 | match self { | ||
82 | SyntaxKind::WHITESPACE | SyntaxKind::DOC_COMMENT | SyntaxKind::COMMENT => true, | ||
83 | _ => false | ||
84 | } | ||
85 | } | 65 | } |
86 | } | 66 | } |
87 | 67 | ||
68 | |||
diff --git a/src/yellow/mod.rs b/src/yellow/mod.rs index 88d88e226..9e64d042f 100644 --- a/src/yellow/mod.rs +++ b/src/yellow/mod.rs | |||
@@ -1,6 +1,7 @@ | |||
1 | mod green; | 1 | mod green; |
2 | mod red; | 2 | mod red; |
3 | mod syntax; | 3 | mod syntax; |
4 | mod builder; | ||
4 | 5 | ||
5 | use std::{ | 6 | use std::{ |
6 | sync::{Arc, Weak}, | 7 | sync::{Arc, Weak}, |
@@ -9,7 +10,8 @@ use std::{ | |||
9 | pub(crate) use self::{ | 10 | pub(crate) use self::{ |
10 | green::{GreenNode, TextLen}, | 11 | green::{GreenNode, TextLen}, |
11 | red::RedNode, | 12 | red::RedNode, |
12 | syntax::SError, | 13 | syntax::SyntaxError, |
14 | builder::GreenBuilder, | ||
13 | }; | 15 | }; |
14 | pub use self::syntax::SyntaxNode; | 16 | pub use self::syntax::SyntaxNode; |
15 | 17 | ||
diff --git a/src/yellow/syntax.rs b/src/yellow/syntax.rs index 7b1a05cd9..78fa5bf95 100644 --- a/src/yellow/syntax.rs +++ b/src/yellow/syntax.rs | |||
@@ -4,7 +4,8 @@ use std::{ | |||
4 | }; | 4 | }; |
5 | 5 | ||
6 | use { | 6 | use { |
7 | TextRange, TextUnit, SyntaxKind, | 7 | TextRange, TextUnit, |
8 | SyntaxKind::{self, *}, | ||
8 | yellow::{Ptr, RedNode, GreenNode, TextLen}, | 9 | yellow::{Ptr, RedNode, GreenNode, TextLen}, |
9 | }; | 10 | }; |
10 | 11 | ||
@@ -18,17 +19,17 @@ pub struct SyntaxNode { | |||
18 | #[derive(Clone)] | 19 | #[derive(Clone)] |
19 | pub struct SyntaxRoot { | 20 | pub struct SyntaxRoot { |
20 | red: Arc<RedNode>, | 21 | red: Arc<RedNode>, |
21 | pub(crate) errors: Arc<Vec<SError>>, | 22 | pub(crate) errors: Arc<Vec<SyntaxError>>, |
22 | } | 23 | } |
23 | 24 | ||
24 | #[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] | 25 | #[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] |
25 | pub(crate) struct SError { | 26 | pub(crate) struct SyntaxError { |
26 | pub(crate) message: String, | 27 | pub(crate) message: String, |
27 | pub(crate) offset: TextUnit, | 28 | pub(crate) offset: TextUnit, |
28 | } | 29 | } |
29 | 30 | ||
30 | impl SyntaxNode { | 31 | impl SyntaxNode { |
31 | pub(crate) fn new(root: Arc<GreenNode>, errors: Vec<SError>) -> SyntaxNode { | 32 | pub(crate) fn new(root: Arc<GreenNode>, errors: Vec<SyntaxError>) -> SyntaxNode { |
32 | let root = Arc::new(RedNode::new_root(root)); | 33 | let root = Arc::new(RedNode::new_root(root)); |
33 | let red = Ptr::new(&root); | 34 | let red = Ptr::new(&root); |
34 | let root = SyntaxRoot { red: root, errors: Arc::new(errors) }; | 35 | let root = SyntaxRoot { red: root, errors: Arc::new(errors) }; |
@@ -123,7 +124,6 @@ impl fmt::Debug for SyntaxNode { | |||
123 | } | 124 | } |
124 | 125 | ||
125 | fn has_short_text(kind: SyntaxKind) -> bool { | 126 | fn has_short_text(kind: SyntaxKind) -> bool { |
126 | use syntax_kinds::*; | ||
127 | match kind { | 127 | match kind { |
128 | IDENT | LIFETIME => true, | 128 | IDENT | LIFETIME => true, |
129 | _ => false, | 129 | _ => false, |
diff --git a/tests/parser.rs b/tests/parser.rs index 1b86fe55a..3b6670cb0 100644 --- a/tests/parser.rs +++ b/tests/parser.rs | |||
@@ -1,15 +1,14 @@ | |||
1 | extern crate libsyntax2; | 1 | extern crate libsyntax2; |
2 | extern crate testutils; | 2 | extern crate testutils; |
3 | 3 | ||
4 | use libsyntax2::{tokenize, parse_green}; | 4 | use libsyntax2::{parse}; |
5 | use libsyntax2::utils::{dump_tree_green}; | 5 | use libsyntax2::utils::{dump_tree_green}; |
6 | use testutils::dir_tests; | 6 | use testutils::dir_tests; |
7 | 7 | ||
8 | #[test] | 8 | #[test] |
9 | fn parser_tests() { | 9 | fn parser_tests() { |
10 | dir_tests(&["parser/inline", "parser/ok", "parser/err"], |text| { | 10 | dir_tests(&["parser/inline", "parser/ok", "parser/err"], |text| { |
11 | let tokens = tokenize(text); | 11 | let file = parse(text.to_string()); |
12 | let file = parse_green(text.to_string(), &tokens); | ||
13 | dump_tree_green(&file) | 12 | dump_tree_green(&file) |
14 | }) | 13 | }) |
15 | } | 14 | } |
diff --git a/tools/src/bin/gen.rs b/tools/src/bin/gen.rs index 7cb164316..e772922ba 100644 --- a/tools/src/bin/gen.rs +++ b/tools/src/bin/gen.rs | |||
@@ -36,7 +36,7 @@ impl Grammar { | |||
36 | acc.push_str("#![allow(bad_style, missing_docs, unreachable_pub)]\n"); | 36 | acc.push_str("#![allow(bad_style, missing_docs, unreachable_pub)]\n"); |
37 | acc.push_str("#![cfg_attr(rustfmt, rustfmt_skip)]\n"); | 37 | acc.push_str("#![cfg_attr(rustfmt, rustfmt_skip)]\n"); |
38 | acc.push_str("//! Generated from grammar.ron\n"); | 38 | acc.push_str("//! Generated from grammar.ron\n"); |
39 | acc.push_str("use tree::SyntaxInfo;\n"); | 39 | acc.push_str("use super::SyntaxInfo;\n"); |
40 | acc.push_str("\n"); | 40 | acc.push_str("\n"); |
41 | 41 | ||
42 | let syntax_kinds: Vec<String> = self.tokens | 42 | let syntax_kinds: Vec<String> = self.tokens |
@@ -82,19 +82,19 @@ impl Grammar { | |||
82 | acc.push_str(" EOF => &SyntaxInfo { name: \"EOF\" },\n"); | 82 | acc.push_str(" EOF => &SyntaxInfo { name: \"EOF\" },\n"); |
83 | acc.push_str(" }\n"); | 83 | acc.push_str(" }\n"); |
84 | acc.push_str(" }\n"); | 84 | acc.push_str(" }\n"); |
85 | acc.push_str("}\n"); | ||
86 | acc.push_str("\n"); | ||
87 | 85 | ||
88 | // fn ident_to_keyword | 86 | // fn from_keyword |
89 | acc.push_str("pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {\n"); | 87 | acc.push_str(" pub(crate) fn from_keyword(ident: &str) -> Option<SyntaxKind> {\n"); |
90 | acc.push_str(" match ident {\n"); | 88 | acc.push_str(" match ident {\n"); |
91 | // NB: no contextual_keywords here! | 89 | // NB: no contextual_keywords here! |
92 | for kw in self.keywords.iter() { | 90 | for kw in self.keywords.iter() { |
93 | write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap(); | 91 | write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap(); |
94 | } | 92 | } |
95 | acc.push_str(" _ => None,\n"); | 93 | acc.push_str(" _ => None,\n"); |
94 | acc.push_str(" }\n"); | ||
96 | acc.push_str(" }\n"); | 95 | acc.push_str(" }\n"); |
97 | acc.push_str("}\n"); | 96 | acc.push_str("}\n"); |
97 | acc.push_str("\n"); | ||
98 | acc | 98 | acc |
99 | } | 99 | } |
100 | } | 100 | } |
@@ -104,7 +104,7 @@ fn grammar_file() -> PathBuf { | |||
104 | } | 104 | } |
105 | 105 | ||
106 | fn generated_file() -> PathBuf { | 106 | fn generated_file() -> PathBuf { |
107 | base_dir().join("src/syntax_kinds.rs") | 107 | base_dir().join("src/syntax_kinds/generated.rs") |
108 | } | 108 | } |
109 | 109 | ||
110 | fn scream(word: &str) -> String { | 110 | fn scream(word: &str) -> String { |
diff --git a/tools/src/bin/parse.rs b/tools/src/bin/parse.rs index 5e4dc261f..cb3414711 100644 --- a/tools/src/bin/parse.rs +++ b/tools/src/bin/parse.rs | |||
@@ -2,13 +2,12 @@ extern crate libsyntax2; | |||
2 | 2 | ||
3 | use std::io::Read; | 3 | use std::io::Read; |
4 | 4 | ||
5 | use libsyntax2::{parse_green, tokenize}; | 5 | use libsyntax2::{parse}; |
6 | use libsyntax2::utils::dump_tree_green; | 6 | use libsyntax2::utils::dump_tree_green; |
7 | 7 | ||
8 | fn main() { | 8 | fn main() { |
9 | let text = read_input(); | 9 | let text = read_input(); |
10 | let tokens = tokenize(&text); | 10 | let file = parse(text); |
11 | let file = parse_green(text, &tokens); | ||
12 | let tree = dump_tree_green(&file); | 11 | let tree = dump_tree_green(&file); |
13 | println!("{}", tree); | 12 | println!("{}", tree); |
14 | } | 13 | } |