diff options
author | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-02-20 20:33:40 +0000 |
---|---|---|
committer | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-02-20 20:33:40 +0000 |
commit | c84561bb624280b84eb2fe6c6b2a6b9fe3f1dbf7 (patch) | |
tree | e47aa900bcffffded370b68b2e50604199b491e3 /crates/ra_syntax | |
parent | 96899f8278b787280bd07d9ac9dce29a610ce40d (diff) | |
parent | 882c47f1870f15cb2aaad8871ccbad1c51520f49 (diff) |
Merge #863
863: Token source r=matklad a=matklad
Some reshuffling of parser's API with the eye towards extracting parse **without** syntax tree into a separate crate, to be used with macro expansion
Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_syntax')
-rw-r--r-- | crates/ra_syntax/src/parsing.rs | 67 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/builder.rs | 21 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/event.rs (renamed from crates/ra_syntax/src/parsing/parser_impl/event.rs) | 19 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/grammar.rs | 2 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/input.rs | 68 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/parser.rs (renamed from crates/ra_syntax/src/parsing/parser_api.rs) | 109 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/parser_impl.rs | 200 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/parser_impl/input.rs | 104 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/reparsing.rs | 8 | ||||
-rw-r--r-- | crates/ra_syntax/src/syntax_error.rs | 5 |
10 files changed, 245 insertions, 358 deletions
diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 761accd7b..138d1394a 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs | |||
@@ -2,24 +2,77 @@ | |||
2 | mod token_set; | 2 | mod token_set; |
3 | mod builder; | 3 | mod builder; |
4 | mod lexer; | 4 | mod lexer; |
5 | mod parser_impl; | 5 | mod event; |
6 | mod parser_api; | 6 | mod input; |
7 | mod parser; | ||
7 | mod grammar; | 8 | mod grammar; |
8 | mod reparsing; | 9 | mod reparsing; |
9 | 10 | ||
10 | use crate::{ | 11 | use crate::{ |
11 | SyntaxError, | 12 | SyntaxKind, SmolStr, SyntaxError, |
12 | parsing::builder::GreenBuilder, | 13 | parsing::{ |
14 | builder::GreenBuilder, | ||
15 | input::ParserInput, | ||
16 | event::EventProcessor, | ||
17 | parser::Parser, | ||
18 | }, | ||
13 | syntax_node::GreenNode, | 19 | syntax_node::GreenNode, |
14 | }; | 20 | }; |
15 | 21 | ||
16 | pub use self::lexer::{tokenize, Token}; | 22 | pub use self::lexer::{tokenize, Token}; |
17 | 23 | ||
24 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
25 | pub struct ParseError(pub String); | ||
26 | |||
18 | pub(crate) use self::reparsing::incremental_reparse; | 27 | pub(crate) use self::reparsing::incremental_reparse; |
19 | 28 | ||
20 | pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { | 29 | pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { |
21 | let tokens = tokenize(&text); | 30 | let tokens = tokenize(&text); |
22 | let (green, errors) = | 31 | parse_with(GreenBuilder::default(), text, &tokens, grammar::root) |
23 | parser_impl::parse_with(GreenBuilder::new(), text, &tokens, grammar::root); | 32 | } |
24 | (green, errors) | 33 | |
34 | fn parse_with<S: TreeSink>( | ||
35 | tree_sink: S, | ||
36 | text: &str, | ||
37 | tokens: &[Token], | ||
38 | f: fn(&mut Parser), | ||
39 | ) -> S::Tree { | ||
40 | let mut events = { | ||
41 | let input = ParserInput::new(text, &tokens); | ||
42 | let mut p = Parser::new(&input); | ||
43 | f(&mut p); | ||
44 | p.finish() | ||
45 | }; | ||
46 | EventProcessor::new(tree_sink, text, tokens, &mut events).process().finish() | ||
47 | } | ||
48 | |||
49 | /// `TreeSink` abstracts details of a particular syntax tree implementation. | ||
50 | trait TreeSink { | ||
51 | type Tree; | ||
52 | |||
53 | /// Adds new leaf to the current branch. | ||
54 | fn leaf(&mut self, kind: SyntaxKind, text: SmolStr); | ||
55 | |||
56 | /// Start new branch and make it current. | ||
57 | fn start_branch(&mut self, kind: SyntaxKind); | ||
58 | |||
59 | /// Finish current branch and restore previous | ||
60 | /// branch as current. | ||
61 | fn finish_branch(&mut self); | ||
62 | |||
63 | fn error(&mut self, error: ParseError); | ||
64 | |||
65 | /// Complete tree building. Make sure that | ||
66 | /// `start_branch` and `finish_branch` calls | ||
67 | /// are paired! | ||
68 | fn finish(self) -> Self::Tree; | ||
69 | } | ||
70 | |||
71 | /// `TokenSource` abstracts the source of the tokens parser operates one. | ||
72 | /// | ||
73 | /// Hopefully this will allow us to treat text and token trees in the same way! | ||
74 | trait TokenSource { | ||
75 | fn token_kind(&self, pos: usize) -> SyntaxKind; | ||
76 | fn is_token_joint_to_next(&self, pos: usize) -> bool; | ||
77 | fn is_keyword(&self, pos: usize, kw: &str) -> bool; | ||
25 | } | 78 | } |
diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs index 9090c60c2..ee0e2cce7 100644 --- a/crates/ra_syntax/src/parsing/builder.rs +++ b/crates/ra_syntax/src/parsing/builder.rs | |||
@@ -1,26 +1,32 @@ | |||
1 | use crate::{ | 1 | use crate::{ |
2 | parsing::parser_impl::Sink, | 2 | SmolStr, SyntaxKind, SyntaxError, SyntaxErrorKind, TextUnit, |
3 | parsing::{TreeSink, ParseError}, | ||
3 | syntax_node::{GreenNode, RaTypes}, | 4 | syntax_node::{GreenNode, RaTypes}, |
4 | SmolStr, SyntaxKind, SyntaxError, | ||
5 | }; | 5 | }; |
6 | 6 | ||
7 | use rowan::GreenNodeBuilder; | 7 | use rowan::GreenNodeBuilder; |
8 | 8 | ||
9 | pub(crate) struct GreenBuilder { | 9 | pub(crate) struct GreenBuilder { |
10 | text_pos: TextUnit, | ||
10 | errors: Vec<SyntaxError>, | 11 | errors: Vec<SyntaxError>, |
11 | inner: GreenNodeBuilder<RaTypes>, | 12 | inner: GreenNodeBuilder<RaTypes>, |
12 | } | 13 | } |
13 | 14 | ||
14 | impl GreenBuilder { | 15 | impl Default for GreenBuilder { |
15 | pub(crate) fn new() -> GreenBuilder { | 16 | fn default() -> GreenBuilder { |
16 | GreenBuilder { errors: Vec::new(), inner: GreenNodeBuilder::new() } | 17 | GreenBuilder { |
18 | text_pos: TextUnit::default(), | ||
19 | errors: Vec::new(), | ||
20 | inner: GreenNodeBuilder::new(), | ||
21 | } | ||
17 | } | 22 | } |
18 | } | 23 | } |
19 | 24 | ||
20 | impl Sink for GreenBuilder { | 25 | impl TreeSink for GreenBuilder { |
21 | type Tree = (GreenNode, Vec<SyntaxError>); | 26 | type Tree = (GreenNode, Vec<SyntaxError>); |
22 | 27 | ||
23 | fn leaf(&mut self, kind: SyntaxKind, text: SmolStr) { | 28 | fn leaf(&mut self, kind: SyntaxKind, text: SmolStr) { |
29 | self.text_pos += TextUnit::of_str(text.as_str()); | ||
24 | self.inner.leaf(kind, text); | 30 | self.inner.leaf(kind, text); |
25 | } | 31 | } |
26 | 32 | ||
@@ -32,7 +38,8 @@ impl Sink for GreenBuilder { | |||
32 | self.inner.finish_internal(); | 38 | self.inner.finish_internal(); |
33 | } | 39 | } |
34 | 40 | ||
35 | fn error(&mut self, error: SyntaxError) { | 41 | fn error(&mut self, error: ParseError) { |
42 | let error = SyntaxError::new(SyntaxErrorKind::ParseError(error), self.text_pos); | ||
36 | self.errors.push(error) | 43 | self.errors.push(error) |
37 | } | 44 | } |
38 | 45 | ||
diff --git a/crates/ra_syntax/src/parsing/parser_impl/event.rs b/crates/ra_syntax/src/parsing/event.rs index 2ddbdd34d..f6f020eab 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/event.rs +++ b/crates/ra_syntax/src/parsing/event.rs | |||
@@ -3,7 +3,7 @@ | |||
3 | //! parser, so as to allow to evolve the tree representation | 3 | //! parser, so as to allow to evolve the tree representation |
4 | //! and the parser algorithm independently. | 4 | //! and the parser algorithm independently. |
5 | //! | 5 | //! |
6 | //! The `Sink` trait is the bridge between the parser and the | 6 | //! The `TreeSink` trait is the bridge between the parser and the |
7 | //! tree builder: the parser produces a stream of events like | 7 | //! tree builder: the parser produces a stream of events like |
8 | //! `start node`, `finish node`, and `FileBuilder` converts | 8 | //! `start node`, `finish node`, and `FileBuilder` converts |
9 | //! this stream to a real tree. | 9 | //! this stream to a real tree. |
@@ -13,14 +13,9 @@ use crate::{ | |||
13 | SmolStr, | 13 | SmolStr, |
14 | SyntaxKind::{self, *}, | 14 | SyntaxKind::{self, *}, |
15 | TextRange, TextUnit, | 15 | TextRange, TextUnit, |
16 | syntax_error::{ | ||
17 | ParseError, | ||
18 | SyntaxError, | ||
19 | SyntaxErrorKind, | ||
20 | }, | ||
21 | parsing::{ | 16 | parsing::{ |
17 | ParseError, TreeSink, | ||
22 | lexer::Token, | 18 | lexer::Token, |
23 | parser_impl::Sink, | ||
24 | }, | 19 | }, |
25 | }; | 20 | }; |
26 | 21 | ||
@@ -93,7 +88,7 @@ impl Event { | |||
93 | } | 88 | } |
94 | } | 89 | } |
95 | 90 | ||
96 | pub(super) struct EventProcessor<'a, S: Sink> { | 91 | pub(super) struct EventProcessor<'a, S: TreeSink> { |
97 | sink: S, | 92 | sink: S, |
98 | text_pos: TextUnit, | 93 | text_pos: TextUnit, |
99 | text: &'a str, | 94 | text: &'a str, |
@@ -102,7 +97,7 @@ pub(super) struct EventProcessor<'a, S: Sink> { | |||
102 | events: &'a mut [Event], | 97 | events: &'a mut [Event], |
103 | } | 98 | } |
104 | 99 | ||
105 | impl<'a, S: Sink> EventProcessor<'a, S> { | 100 | impl<'a, S: TreeSink> EventProcessor<'a, S> { |
106 | pub(super) fn new( | 101 | pub(super) fn new( |
107 | sink: S, | 102 | sink: S, |
108 | text: &'a str, | 103 | text: &'a str, |
@@ -113,7 +108,7 @@ impl<'a, S: Sink> EventProcessor<'a, S> { | |||
113 | } | 108 | } |
114 | 109 | ||
115 | /// Generate the syntax tree with the control of events. | 110 | /// Generate the syntax tree with the control of events. |
116 | pub(super) fn process(mut self) -> S { | 111 | pub(crate) fn process(mut self) -> S { |
117 | let mut forward_parents = Vec::new(); | 112 | let mut forward_parents = Vec::new(); |
118 | 113 | ||
119 | for i in 0..self.events.len() { | 114 | for i in 0..self.events.len() { |
@@ -159,9 +154,7 @@ impl<'a, S: Sink> EventProcessor<'a, S> { | |||
159 | .sum::<TextUnit>(); | 154 | .sum::<TextUnit>(); |
160 | self.leaf(kind, len, n_raw_tokens); | 155 | self.leaf(kind, len, n_raw_tokens); |
161 | } | 156 | } |
162 | Event::Error { msg } => self | 157 | Event::Error { msg } => self.sink.error(msg), |
163 | .sink | ||
164 | .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)), | ||
165 | } | 158 | } |
166 | } | 159 | } |
167 | self.sink | 160 | self.sink |
diff --git a/crates/ra_syntax/src/parsing/grammar.rs b/crates/ra_syntax/src/parsing/grammar.rs index bcdcd9f57..7ca9c223c 100644 --- a/crates/ra_syntax/src/parsing/grammar.rs +++ b/crates/ra_syntax/src/parsing/grammar.rs | |||
@@ -41,7 +41,7 @@ use crate::{ | |||
41 | SyntaxKind::{self, *}, | 41 | SyntaxKind::{self, *}, |
42 | parsing::{ | 42 | parsing::{ |
43 | token_set::TokenSet, | 43 | token_set::TokenSet, |
44 | parser_api::{CompletedMarker, Marker, Parser} | 44 | parser::{CompletedMarker, Marker, Parser} |
45 | }, | 45 | }, |
46 | }; | 46 | }; |
47 | 47 | ||
diff --git a/crates/ra_syntax/src/parsing/input.rs b/crates/ra_syntax/src/parsing/input.rs new file mode 100644 index 000000000..96c03bb11 --- /dev/null +++ b/crates/ra_syntax/src/parsing/input.rs | |||
@@ -0,0 +1,68 @@ | |||
1 | use crate::{ | ||
2 | SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, | ||
3 | parsing::{ | ||
4 | TokenSource, | ||
5 | lexer::Token, | ||
6 | }, | ||
7 | }; | ||
8 | |||
9 | impl<'t> TokenSource for ParserInput<'t> { | ||
10 | fn token_kind(&self, pos: usize) -> SyntaxKind { | ||
11 | if !(pos < self.tokens.len()) { | ||
12 | return EOF; | ||
13 | } | ||
14 | self.tokens[pos].kind | ||
15 | } | ||
16 | fn is_token_joint_to_next(&self, pos: usize) -> bool { | ||
17 | if !(pos + 1 < self.tokens.len()) { | ||
18 | return true; | ||
19 | } | ||
20 | self.start_offsets[pos] + self.tokens[pos].len == self.start_offsets[pos + 1] | ||
21 | } | ||
22 | fn is_keyword(&self, pos: usize, kw: &str) -> bool { | ||
23 | if !(pos < self.tokens.len()) { | ||
24 | return false; | ||
25 | } | ||
26 | let range = TextRange::offset_len(self.start_offsets[pos], self.tokens[pos].len); | ||
27 | |||
28 | self.text[range] == *kw | ||
29 | } | ||
30 | } | ||
31 | |||
32 | pub(crate) struct ParserInput<'t> { | ||
33 | text: &'t str, | ||
34 | /// start position of each token(expect whitespace and comment) | ||
35 | /// ```non-rust | ||
36 | /// struct Foo; | ||
37 | /// ^------^--- | ||
38 | /// | | ^- | ||
39 | /// 0 7 10 | ||
40 | /// ``` | ||
41 | /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` | ||
42 | start_offsets: Vec<TextUnit>, | ||
43 | /// non-whitespace/comment tokens | ||
44 | /// ```non-rust | ||
45 | /// struct Foo {} | ||
46 | /// ^^^^^^ ^^^ ^^ | ||
47 | /// ``` | ||
48 | /// tokens: `[struct, Foo, {, }]` | ||
49 | tokens: Vec<Token>, | ||
50 | } | ||
51 | |||
52 | impl<'t> ParserInput<'t> { | ||
53 | /// Generate input from tokens(expect comment and whitespace). | ||
54 | pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { | ||
55 | let mut tokens = Vec::new(); | ||
56 | let mut start_offsets = Vec::new(); | ||
57 | let mut len = 0.into(); | ||
58 | for &token in raw_tokens.iter() { | ||
59 | if !token.kind.is_trivia() { | ||
60 | tokens.push(token); | ||
61 | start_offsets.push(len); | ||
62 | } | ||
63 | len += token.len; | ||
64 | } | ||
65 | |||
66 | ParserInput { text, start_offsets, tokens } | ||
67 | } | ||
68 | } | ||
diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser.rs index 781c407de..923b0f2b2 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser.rs | |||
@@ -1,10 +1,13 @@ | |||
1 | use std::cell::Cell; | ||
2 | |||
1 | use drop_bomb::DropBomb; | 3 | use drop_bomb::DropBomb; |
2 | 4 | ||
3 | use crate::{ | 5 | use crate::{ |
4 | SyntaxKind::{self, ERROR}, | 6 | SyntaxKind::{self, ERROR, EOF, TOMBSTONE}, |
5 | parsing::{ | 7 | parsing::{ |
8 | TokenSource, ParseError, | ||
6 | token_set::TokenSet, | 9 | token_set::TokenSet, |
7 | parser_impl::ParserImpl | 10 | event::Event, |
8 | }, | 11 | }, |
9 | }; | 12 | }; |
10 | 13 | ||
@@ -17,9 +20,22 @@ use crate::{ | |||
17 | /// tree, but rather a flat stream of events of the form | 20 | /// tree, but rather a flat stream of events of the form |
18 | /// "start expression, consume number literal, | 21 | /// "start expression, consume number literal, |
19 | /// finish expression". See `Event` docs for more. | 22 | /// finish expression". See `Event` docs for more. |
20 | pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); | 23 | pub(crate) struct Parser<'t> { |
24 | token_source: &'t dyn TokenSource, | ||
25 | token_pos: usize, | ||
26 | events: Vec<Event>, | ||
27 | steps: Cell<u32>, | ||
28 | } | ||
21 | 29 | ||
22 | impl<'t> Parser<'t> { | 30 | impl<'t> Parser<'t> { |
31 | pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { | ||
32 | Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) } | ||
33 | } | ||
34 | |||
35 | pub(crate) fn finish(self) -> Vec<Event> { | ||
36 | self.events | ||
37 | } | ||
38 | |||
23 | /// Returns the kind of the current token. | 39 | /// Returns the kind of the current token. |
24 | /// If parser has already reached the end of input, | 40 | /// If parser has already reached the end of input, |
25 | /// the special `EOF` kind is returned. | 41 | /// the special `EOF` kind is returned. |
@@ -32,7 +48,13 @@ impl<'t> Parser<'t> { | |||
32 | /// | 48 | /// |
33 | /// Useful for parsing things like `>>`. | 49 | /// Useful for parsing things like `>>`. |
34 | pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { | 50 | pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { |
35 | self.0.current2() | 51 | let c1 = self.token_source.token_kind(self.token_pos); |
52 | let c2 = self.token_source.token_kind(self.token_pos + 1); | ||
53 | if self.token_source.is_token_joint_to_next(self.token_pos) { | ||
54 | Some((c1, c2)) | ||
55 | } else { | ||
56 | None | ||
57 | } | ||
36 | } | 58 | } |
37 | 59 | ||
38 | /// Returns the kinds of the current three tokens, if they are not separated | 60 | /// Returns the kinds of the current three tokens, if they are not separated |
@@ -40,13 +62,25 @@ impl<'t> Parser<'t> { | |||
40 | /// | 62 | /// |
41 | /// Useful for parsing things like `=>>`. | 63 | /// Useful for parsing things like `=>>`. |
42 | pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { | 64 | pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { |
43 | self.0.current3() | 65 | let c1 = self.token_source.token_kind(self.token_pos); |
66 | let c2 = self.token_source.token_kind(self.token_pos + 1); | ||
67 | let c3 = self.token_source.token_kind(self.token_pos + 2); | ||
68 | if self.token_source.is_token_joint_to_next(self.token_pos) | ||
69 | && self.token_source.is_token_joint_to_next(self.token_pos + 1) | ||
70 | { | ||
71 | Some((c1, c2, c3)) | ||
72 | } else { | ||
73 | None | ||
74 | } | ||
44 | } | 75 | } |
45 | 76 | ||
46 | /// Lookahead operation: returns the kind of the next nth | 77 | /// Lookahead operation: returns the kind of the next nth |
47 | /// token. | 78 | /// token. |
48 | pub(crate) fn nth(&self, n: u32) -> SyntaxKind { | 79 | pub(crate) fn nth(&self, n: usize) -> SyntaxKind { |
49 | self.0.nth(n) | 80 | let steps = self.steps.get(); |
81 | assert!(steps <= 10_000_000, "the parser seems stuck"); | ||
82 | self.steps.set(steps + 1); | ||
83 | self.token_source.token_kind(self.token_pos + n) | ||
50 | } | 84 | } |
51 | 85 | ||
52 | /// Checks if the current token is `kind`. | 86 | /// Checks if the current token is `kind`. |
@@ -60,20 +94,26 @@ impl<'t> Parser<'t> { | |||
60 | } | 94 | } |
61 | 95 | ||
62 | /// Checks if the current token is contextual keyword with text `t`. | 96 | /// Checks if the current token is contextual keyword with text `t`. |
63 | pub(crate) fn at_contextual_kw(&self, t: &str) -> bool { | 97 | pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { |
64 | self.0.at_kw(t) | 98 | self.token_source.is_keyword(self.token_pos, kw) |
65 | } | 99 | } |
66 | 100 | ||
67 | /// Starts a new node in the syntax tree. All nodes and tokens | 101 | /// Starts a new node in the syntax tree. All nodes and tokens |
68 | /// consumed between the `start` and the corresponding `Marker::complete` | 102 | /// consumed between the `start` and the corresponding `Marker::complete` |
69 | /// belong to the same node. | 103 | /// belong to the same node. |
70 | pub(crate) fn start(&mut self) -> Marker { | 104 | pub(crate) fn start(&mut self) -> Marker { |
71 | Marker::new(self.0.start()) | 105 | let pos = self.events.len() as u32; |
106 | self.push_event(Event::tombstone()); | ||
107 | Marker::new(pos) | ||
72 | } | 108 | } |
73 | 109 | ||
74 | /// Advances the parser by one token unconditionally. | 110 | /// Advances the parser by one token unconditionally. |
75 | pub(crate) fn bump(&mut self) { | 111 | pub(crate) fn bump(&mut self) { |
76 | self.0.bump(); | 112 | let kind = self.nth(0); |
113 | if kind == EOF { | ||
114 | return; | ||
115 | } | ||
116 | self.do_bump(kind, 1); | ||
77 | } | 117 | } |
78 | 118 | ||
79 | /// Advances the parser by one token, remapping its kind. | 119 | /// Advances the parser by one token, remapping its kind. |
@@ -83,14 +123,18 @@ impl<'t> Parser<'t> { | |||
83 | /// `union` keyword, and keyword is what ends up in the | 123 | /// `union` keyword, and keyword is what ends up in the |
84 | /// final tree. | 124 | /// final tree. |
85 | pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { | 125 | pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { |
86 | self.0.bump_remap(kind); | 126 | if self.nth(0) == EOF { |
127 | // TODO: panic!? | ||
128 | return; | ||
129 | } | ||
130 | self.do_bump(kind, 1); | ||
87 | } | 131 | } |
88 | 132 | ||
89 | /// Advances the parser by `n` tokens, remapping its kind. | 133 | /// Advances the parser by `n` tokens, remapping its kind. |
90 | /// This is useful to create compound tokens from parts. For | 134 | /// This is useful to create compound tokens from parts. For |
91 | /// example, an `<<` token is two consecutive remapped `<` tokens | 135 | /// example, an `<<` token is two consecutive remapped `<` tokens |
92 | pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { | 136 | pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { |
93 | self.0.bump_compound(kind, n); | 137 | self.do_bump(kind, n); |
94 | } | 138 | } |
95 | 139 | ||
96 | /// Emit error with the `message` | 140 | /// Emit error with the `message` |
@@ -98,7 +142,8 @@ impl<'t> Parser<'t> { | |||
98 | /// structured errors with spans and notes, like rustc | 142 | /// structured errors with spans and notes, like rustc |
99 | /// does. | 143 | /// does. |
100 | pub(crate) fn error<T: Into<String>>(&mut self, message: T) { | 144 | pub(crate) fn error<T: Into<String>>(&mut self, message: T) { |
101 | self.0.error(message.into()) | 145 | let msg = ParseError(message.into()); |
146 | self.push_event(Event::Error { msg }) | ||
102 | } | 147 | } |
103 | 148 | ||
104 | /// Consume the next token if `kind` matches. | 149 | /// Consume the next token if `kind` matches. |
@@ -136,6 +181,15 @@ impl<'t> Parser<'t> { | |||
136 | m.complete(self, ERROR); | 181 | m.complete(self, ERROR); |
137 | }; | 182 | }; |
138 | } | 183 | } |
184 | |||
185 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { | ||
186 | self.token_pos += usize::from(n_raw_tokens); | ||
187 | self.push_event(Event::Token { kind, n_raw_tokens }); | ||
188 | } | ||
189 | |||
190 | fn push_event(&mut self, event: Event) { | ||
191 | self.events.push(event) | ||
192 | } | ||
139 | } | 193 | } |
140 | 194 | ||
141 | /// See `Parser::start`. | 195 | /// See `Parser::start`. |
@@ -154,7 +208,14 @@ impl Marker { | |||
154 | /// operation like `.precede()` to deal with forward_parent. | 208 | /// operation like `.precede()` to deal with forward_parent. |
155 | pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { | 209 | pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { |
156 | self.bomb.defuse(); | 210 | self.bomb.defuse(); |
157 | p.0.complete(self.pos, kind); | 211 | let idx = self.pos as usize; |
212 | match p.events[idx] { | ||
213 | Event::Start { kind: ref mut slot, .. } => { | ||
214 | *slot = kind; | ||
215 | } | ||
216 | _ => unreachable!(), | ||
217 | } | ||
218 | p.push_event(Event::Finish); | ||
158 | CompletedMarker::new(self.pos, kind) | 219 | CompletedMarker::new(self.pos, kind) |
159 | } | 220 | } |
160 | 221 | ||
@@ -162,7 +223,13 @@ impl Marker { | |||
162 | /// are attached to its parent instead. | 223 | /// are attached to its parent instead. |
163 | pub(crate) fn abandon(mut self, p: &mut Parser) { | 224 | pub(crate) fn abandon(mut self, p: &mut Parser) { |
164 | self.bomb.defuse(); | 225 | self.bomb.defuse(); |
165 | p.0.abandon(self.pos); | 226 | let idx = self.pos as usize; |
227 | if idx == p.events.len() - 1 { | ||
228 | match p.events.pop() { | ||
229 | Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), | ||
230 | _ => unreachable!(), | ||
231 | } | ||
232 | } | ||
166 | } | 233 | } |
167 | } | 234 | } |
168 | 235 | ||
@@ -186,7 +253,15 @@ impl CompletedMarker { | |||
186 | /// then mark `NEWSTART` as `START`'s parent with saving its relative | 253 | /// then mark `NEWSTART` as `START`'s parent with saving its relative |
187 | /// distance to `NEWSTART` into forward_parent(=2 in this case); | 254 | /// distance to `NEWSTART` into forward_parent(=2 in this case); |
188 | pub(crate) fn precede(self, p: &mut Parser) -> Marker { | 255 | pub(crate) fn precede(self, p: &mut Parser) -> Marker { |
189 | Marker::new(p.0.precede(self.0)) | 256 | let new_pos = p.start(); |
257 | let idx = self.0 as usize; | ||
258 | match p.events[idx] { | ||
259 | Event::Start { ref mut forward_parent, .. } => { | ||
260 | *forward_parent = Some(new_pos.pos - self.0); | ||
261 | } | ||
262 | _ => unreachable!(), | ||
263 | } | ||
264 | new_pos | ||
190 | } | 265 | } |
191 | 266 | ||
192 | pub(crate) fn kind(&self) -> SyntaxKind { | 267 | pub(crate) fn kind(&self) -> SyntaxKind { |
diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs deleted file mode 100644 index 8cce1ab01..000000000 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ /dev/null | |||
@@ -1,200 +0,0 @@ | |||
1 | mod event; | ||
2 | mod input; | ||
3 | |||
4 | use std::cell::Cell; | ||
5 | |||
6 | use crate::{ | ||
7 | SmolStr, | ||
8 | syntax_error::{ParseError, SyntaxError}, | ||
9 | parsing::{ | ||
10 | lexer::Token, | ||
11 | parser_api::Parser, | ||
12 | parser_impl::{ | ||
13 | event::{Event, EventProcessor}, | ||
14 | input::{InputPosition, ParserInput}, | ||
15 | }, | ||
16 | }, | ||
17 | }; | ||
18 | |||
19 | use crate::SyntaxKind::{self, EOF, TOMBSTONE}; | ||
20 | |||
21 | pub(super) trait Sink { | ||
22 | type Tree; | ||
23 | |||
24 | /// Adds new leaf to the current branch. | ||
25 | fn leaf(&mut self, kind: SyntaxKind, text: SmolStr); | ||
26 | |||
27 | /// Start new branch and make it current. | ||
28 | fn start_branch(&mut self, kind: SyntaxKind); | ||
29 | |||
30 | /// Finish current branch and restore previous | ||
31 | /// branch as current. | ||
32 | fn finish_branch(&mut self); | ||
33 | |||
34 | fn error(&mut self, error: SyntaxError); | ||
35 | |||
36 | /// Complete tree building. Make sure that | ||
37 | /// `start_branch` and `finish_branch` calls | ||
38 | /// are paired! | ||
39 | fn finish(self) -> Self::Tree; | ||
40 | } | ||
41 | |||
42 | /// Parse a sequence of tokens into the representative node tree | ||
43 | pub(super) fn parse_with<S: Sink>( | ||
44 | sink: S, | ||
45 | text: &str, | ||
46 | tokens: &[Token], | ||
47 | parser: fn(&mut Parser), | ||
48 | ) -> S::Tree { | ||
49 | let mut events = { | ||
50 | let input = input::ParserInput::new(text, tokens); | ||
51 | let parser_impl = ParserImpl::new(&input); | ||
52 | let mut parser_api = Parser(parser_impl); | ||
53 | parser(&mut parser_api); | ||
54 | parser_api.0.into_events() | ||
55 | }; | ||
56 | EventProcessor::new(sink, text, tokens, &mut events).process().finish() | ||
57 | } | ||
58 | |||
59 | /// Implementation details of `Parser`, extracted | ||
60 | /// to a separate struct in order not to pollute | ||
61 | /// the public API of the `Parser`. | ||
62 | pub(super) struct ParserImpl<'t> { | ||
63 | parser_input: &'t ParserInput<'t>, | ||
64 | pos: InputPosition, | ||
65 | events: Vec<Event>, | ||
66 | steps: Cell<u32>, | ||
67 | } | ||
68 | |||
69 | impl<'t> ParserImpl<'t> { | ||
70 | fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { | ||
71 | ParserImpl { | ||
72 | parser_input: inp, | ||
73 | pos: InputPosition::new(), | ||
74 | events: Vec::new(), | ||
75 | steps: Cell::new(0), | ||
76 | } | ||
77 | } | ||
78 | |||
79 | fn into_events(self) -> Vec<Event> { | ||
80 | assert_eq!(self.nth(0), EOF); | ||
81 | self.events | ||
82 | } | ||
83 | |||
84 | pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { | ||
85 | let c1 = self.parser_input.kind(self.pos); | ||
86 | let c2 = self.parser_input.kind(self.pos + 1); | ||
87 | if self.parser_input.token_start_at(self.pos + 1) | ||
88 | == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) | ||
89 | { | ||
90 | Some((c1, c2)) | ||
91 | } else { | ||
92 | None | ||
93 | } | ||
94 | } | ||
95 | |||
96 | pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { | ||
97 | let c1 = self.parser_input.kind(self.pos); | ||
98 | let c2 = self.parser_input.kind(self.pos + 1); | ||
99 | let c3 = self.parser_input.kind(self.pos + 2); | ||
100 | if self.parser_input.token_start_at(self.pos + 1) | ||
101 | == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) | ||
102 | && self.parser_input.token_start_at(self.pos + 2) | ||
103 | == self.parser_input.token_start_at(self.pos + 1) | ||
104 | + self.parser_input.token_len(self.pos + 1) | ||
105 | { | ||
106 | Some((c1, c2, c3)) | ||
107 | } else { | ||
108 | None | ||
109 | } | ||
110 | } | ||
111 | |||
112 | /// Get the syntax kind of the nth token. | ||
113 | pub(super) fn nth(&self, n: u32) -> SyntaxKind { | ||
114 | let steps = self.steps.get(); | ||
115 | assert!(steps <= 10_000_000, "the parser seems stuck"); | ||
116 | self.steps.set(steps + 1); | ||
117 | |||
118 | self.parser_input.kind(self.pos + n) | ||
119 | } | ||
120 | |||
121 | pub(super) fn at_kw(&self, t: &str) -> bool { | ||
122 | self.parser_input.token_text(self.pos) == t | ||
123 | } | ||
124 | |||
125 | /// Start parsing right behind the last event. | ||
126 | pub(super) fn start(&mut self) -> u32 { | ||
127 | let pos = self.events.len() as u32; | ||
128 | self.push_event(Event::tombstone()); | ||
129 | pos | ||
130 | } | ||
131 | |||
132 | /// Advances the parser by one token unconditionally. | ||
133 | pub(super) fn bump(&mut self) { | ||
134 | let kind = self.nth(0); | ||
135 | if kind == EOF { | ||
136 | return; | ||
137 | } | ||
138 | self.do_bump(kind, 1); | ||
139 | } | ||
140 | |||
141 | pub(super) fn bump_remap(&mut self, kind: SyntaxKind) { | ||
142 | if self.nth(0) == EOF { | ||
143 | // TODO: panic!? | ||
144 | return; | ||
145 | } | ||
146 | self.do_bump(kind, 1); | ||
147 | } | ||
148 | |||
149 | pub(super) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { | ||
150 | self.do_bump(kind, n); | ||
151 | } | ||
152 | |||
153 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { | ||
154 | self.pos += u32::from(n_raw_tokens); | ||
155 | self.push_event(Event::Token { kind, n_raw_tokens }); | ||
156 | } | ||
157 | |||
158 | /// Append one Error event to the back of events. | ||
159 | pub(super) fn error(&mut self, msg: String) { | ||
160 | self.push_event(Event::Error { msg: ParseError(msg) }) | ||
161 | } | ||
162 | |||
163 | /// Complete an event with appending a `Finish` event. | ||
164 | pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { | ||
165 | match self.events[pos as usize] { | ||
166 | Event::Start { kind: ref mut slot, .. } => { | ||
167 | *slot = kind; | ||
168 | } | ||
169 | _ => unreachable!(), | ||
170 | } | ||
171 | self.push_event(Event::Finish); | ||
172 | } | ||
173 | |||
174 | /// Ignore the dummy `Start` event. | ||
175 | pub(super) fn abandon(&mut self, pos: u32) { | ||
176 | let idx = pos as usize; | ||
177 | if idx == self.events.len() - 1 { | ||
178 | match self.events.pop() { | ||
179 | Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), | ||
180 | _ => unreachable!(), | ||
181 | } | ||
182 | } | ||
183 | } | ||
184 | |||
185 | /// Save the relative distance of a completed event to its forward_parent. | ||
186 | pub(super) fn precede(&mut self, pos: u32) -> u32 { | ||
187 | let new_pos = self.start(); | ||
188 | match self.events[pos as usize] { | ||
189 | Event::Start { ref mut forward_parent, .. } => { | ||
190 | *forward_parent = Some(new_pos - pos); | ||
191 | } | ||
192 | _ => unreachable!(), | ||
193 | } | ||
194 | new_pos | ||
195 | } | ||
196 | |||
197 | fn push_event(&mut self, event: Event) { | ||
198 | self.events.push(event) | ||
199 | } | ||
200 | } | ||
diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs deleted file mode 100644 index 275d94918..000000000 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ /dev/null | |||
@@ -1,104 +0,0 @@ | |||
1 | use crate::{ | ||
2 | SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, | ||
3 | parsing::lexer::Token, | ||
4 | }; | ||
5 | |||
6 | use std::ops::{Add, AddAssign}; | ||
7 | |||
8 | pub(crate) struct ParserInput<'t> { | ||
9 | text: &'t str, | ||
10 | /// start position of each token(expect whitespace and comment) | ||
11 | /// ```non-rust | ||
12 | /// struct Foo; | ||
13 | /// ^------^--- | ||
14 | /// | | ^- | ||
15 | /// 0 7 10 | ||
16 | /// ``` | ||
17 | /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` | ||
18 | start_offsets: Vec<TextUnit>, | ||
19 | /// non-whitespace/comment tokens | ||
20 | /// ```non-rust | ||
21 | /// struct Foo {} | ||
22 | /// ^^^^^^ ^^^ ^^ | ||
23 | /// ``` | ||
24 | /// tokens: `[struct, Foo, {, }]` | ||
25 | tokens: Vec<Token>, | ||
26 | } | ||
27 | |||
28 | impl<'t> ParserInput<'t> { | ||
29 | /// Generate input from tokens(expect comment and whitespace). | ||
30 | pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { | ||
31 | let mut tokens = Vec::new(); | ||
32 | let mut start_offsets = Vec::new(); | ||
33 | let mut len = 0.into(); | ||
34 | for &token in raw_tokens.iter() { | ||
35 | if !token.kind.is_trivia() { | ||
36 | tokens.push(token); | ||
37 | start_offsets.push(len); | ||
38 | } | ||
39 | len += token.len; | ||
40 | } | ||
41 | |||
42 | ParserInput { text, start_offsets, tokens } | ||
43 | } | ||
44 | |||
45 | /// Get the syntax kind of token at given input position. | ||
46 | pub fn kind(&self, pos: InputPosition) -> SyntaxKind { | ||
47 | let idx = pos.0 as usize; | ||
48 | if !(idx < self.tokens.len()) { | ||
49 | return EOF; | ||
50 | } | ||
51 | self.tokens[idx].kind | ||
52 | } | ||
53 | |||
54 | /// Get the length of a token at given input position. | ||
55 | pub fn token_len(&self, pos: InputPosition) -> TextUnit { | ||
56 | let idx = pos.0 as usize; | ||
57 | if !(idx < self.tokens.len()) { | ||
58 | return 0.into(); | ||
59 | } | ||
60 | self.tokens[idx].len | ||
61 | } | ||
62 | |||
63 | /// Get the start position of a taken at given input position. | ||
64 | pub fn token_start_at(&self, pos: InputPosition) -> TextUnit { | ||
65 | let idx = pos.0 as usize; | ||
66 | if !(idx < self.tokens.len()) { | ||
67 | return 0.into(); | ||
68 | } | ||
69 | self.start_offsets[idx] | ||
70 | } | ||
71 | |||
72 | /// Get the raw text of a token at given input position. | ||
73 | pub fn token_text(&self, pos: InputPosition) -> &'t str { | ||
74 | let idx = pos.0 as usize; | ||
75 | if !(idx < self.tokens.len()) { | ||
76 | return ""; | ||
77 | } | ||
78 | let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); | ||
79 | &self.text[range] | ||
80 | } | ||
81 | } | ||
82 | |||
83 | #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] | ||
84 | pub(crate) struct InputPosition(u32); | ||
85 | |||
86 | impl InputPosition { | ||
87 | pub fn new() -> Self { | ||
88 | InputPosition(0) | ||
89 | } | ||
90 | } | ||
91 | |||
92 | impl Add<u32> for InputPosition { | ||
93 | type Output = InputPosition; | ||
94 | |||
95 | fn add(self, rhs: u32) -> InputPosition { | ||
96 | InputPosition(self.0 + rhs) | ||
97 | } | ||
98 | } | ||
99 | |||
100 | impl AddAssign<u32> for InputPosition { | ||
101 | fn add_assign(&mut self, rhs: u32) { | ||
102 | self.0 += rhs | ||
103 | } | ||
104 | } | ||
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index edf3fa291..f2d218ab9 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs | |||
@@ -4,10 +4,9 @@ use crate::{ | |||
4 | syntax_node::{GreenNode, SyntaxNode}, | 4 | syntax_node::{GreenNode, SyntaxNode}, |
5 | syntax_error::SyntaxError, | 5 | syntax_error::SyntaxError, |
6 | parsing::{ | 6 | parsing::{ |
7 | grammar, | 7 | grammar, parse_with, |
8 | parser_impl, | ||
9 | builder::GreenBuilder, | 8 | builder::GreenBuilder, |
10 | parser_api::Parser, | 9 | parser::Parser, |
11 | lexer::{tokenize, Token}, | 10 | lexer::{tokenize, Token}, |
12 | } | 11 | } |
13 | }; | 12 | }; |
@@ -62,8 +61,7 @@ fn reparse_block<'node>( | |||
62 | if !is_balanced(&tokens) { | 61 | if !is_balanced(&tokens) { |
63 | return None; | 62 | return None; |
64 | } | 63 | } |
65 | let (green, new_errors) = | 64 | let (green, new_errors) = parse_with(GreenBuilder::default(), &text, &tokens, reparser); |
66 | parser_impl::parse_with(GreenBuilder::new(), &text, &tokens, reparser); | ||
67 | Some((node, green, new_errors)) | 65 | Some((node, green, new_errors)) |
68 | } | 66 | } |
69 | 67 | ||
diff --git a/crates/ra_syntax/src/syntax_error.rs b/crates/ra_syntax/src/syntax_error.rs index 4ff998090..1a00fcc27 100644 --- a/crates/ra_syntax/src/syntax_error.rs +++ b/crates/ra_syntax/src/syntax_error.rs | |||
@@ -1,6 +1,6 @@ | |||
1 | use std::fmt; | 1 | use std::fmt; |
2 | 2 | ||
3 | use crate::{TextRange, TextUnit}; | 3 | use crate::{TextRange, TextUnit, parsing::ParseError}; |
4 | 4 | ||
5 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | 5 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] |
6 | pub struct SyntaxError { | 6 | pub struct SyntaxError { |
@@ -95,9 +95,6 @@ pub enum SyntaxErrorKind { | |||
95 | InvalidMatchInnerAttr, | 95 | InvalidMatchInnerAttr, |
96 | } | 96 | } |
97 | 97 | ||
98 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
99 | pub struct ParseError(pub String); | ||
100 | |||
101 | impl fmt::Display for SyntaxErrorKind { | 98 | impl fmt::Display for SyntaxErrorKind { |
102 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | 99 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
103 | use self::SyntaxErrorKind::*; | 100 | use self::SyntaxErrorKind::*; |