diff options
author | Aleksey Kladov <[email protected]> | 2019-02-20 18:50:07 +0000 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2019-02-20 18:50:07 +0000 |
commit | 0c81b9deeed81bfb2cf8142af9d748317d5d71a1 (patch) | |
tree | e9c0a1affabfa444611b762dc721d426e3a5bb56 /crates/ra_syntax | |
parent | 3517c175ac537b47dd3e36cc7fb1edd60b02c039 (diff) |
route parsing via TokenSource trait
Diffstat (limited to 'crates/ra_syntax')
-rw-r--r-- | crates/ra_syntax/src/parsing/parser_api.rs | 4 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/parser_impl.rs | 50 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/parser_impl/input.rs | 69 |
3 files changed, 59 insertions, 64 deletions
diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index 781c407de..813ae494c 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs | |||
@@ -17,7 +17,9 @@ use crate::{ | |||
17 | /// tree, but rather a flat stream of events of the form | 17 | /// tree, but rather a flat stream of events of the form |
18 | /// "start expression, consume number literal, | 18 | /// "start expression, consume number literal, |
19 | /// finish expression". See `Event` docs for more. | 19 | /// finish expression". See `Event` docs for more. |
20 | pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); | 20 | pub(crate) struct Parser<'t>( |
21 | pub(super) ParserImpl<crate::parsing::parser_impl::input::ParserInput<'t>>, | ||
22 | ); | ||
21 | 23 | ||
22 | impl<'t> Parser<'t> { | 24 | impl<'t> Parser<'t> { |
23 | /// Returns the kind of the current token. | 25 | /// Returns the kind of the current token. |
diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index 02baed76b..c0d2b6ec1 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs | |||
@@ -1,5 +1,5 @@ | |||
1 | mod event; | 1 | mod event; |
2 | mod input; | 2 | pub(crate) mod input; |
3 | 3 | ||
4 | use std::cell::Cell; | 4 | use std::cell::Cell; |
5 | 5 | ||
@@ -11,7 +11,7 @@ use crate::{ | |||
11 | parser_api::Parser, | 11 | parser_api::Parser, |
12 | parser_impl::{ | 12 | parser_impl::{ |
13 | event::{Event, EventProcessor}, | 13 | event::{Event, EventProcessor}, |
14 | input::{InputPosition, ParserInput}, | 14 | input::InputPosition, |
15 | }, | 15 | }, |
16 | }, | 16 | }, |
17 | }; | 17 | }; |
@@ -39,6 +39,12 @@ pub(super) trait TreeSink { | |||
39 | fn finish(self) -> Self::Tree; | 39 | fn finish(self) -> Self::Tree; |
40 | } | 40 | } |
41 | 41 | ||
42 | pub(super) trait TokenSource { | ||
43 | fn token_kind(&self, pos: InputPosition) -> SyntaxKind; | ||
44 | fn is_token_joint_to_next(&self, pos: InputPosition) -> bool; | ||
45 | fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool; | ||
46 | } | ||
47 | |||
42 | /// Parse a sequence of tokens into the representative node tree | 48 | /// Parse a sequence of tokens into the representative node tree |
43 | pub(super) fn parse_with<S: TreeSink>( | 49 | pub(super) fn parse_with<S: TreeSink>( |
44 | sink: S, | 50 | sink: S, |
@@ -48,7 +54,7 @@ pub(super) fn parse_with<S: TreeSink>( | |||
48 | ) -> S::Tree { | 54 | ) -> S::Tree { |
49 | let mut events = { | 55 | let mut events = { |
50 | let input = input::ParserInput::new(text, tokens); | 56 | let input = input::ParserInput::new(text, tokens); |
51 | let parser_impl = ParserImpl::new(&input); | 57 | let parser_impl = ParserImpl::new(input); |
52 | let mut parser_api = Parser(parser_impl); | 58 | let mut parser_api = Parser(parser_impl); |
53 | parser(&mut parser_api); | 59 | parser(&mut parser_api); |
54 | parser_api.0.into_events() | 60 | parser_api.0.into_events() |
@@ -59,17 +65,17 @@ pub(super) fn parse_with<S: TreeSink>( | |||
59 | /// Implementation details of `Parser`, extracted | 65 | /// Implementation details of `Parser`, extracted |
60 | /// to a separate struct in order not to pollute | 66 | /// to a separate struct in order not to pollute |
61 | /// the public API of the `Parser`. | 67 | /// the public API of the `Parser`. |
62 | pub(super) struct ParserImpl<'t> { | 68 | pub(super) struct ParserImpl<S> { |
63 | parser_input: &'t ParserInput<'t>, | 69 | token_source: S, |
64 | pos: InputPosition, | 70 | pos: InputPosition, |
65 | events: Vec<Event>, | 71 | events: Vec<Event>, |
66 | steps: Cell<u32>, | 72 | steps: Cell<u32>, |
67 | } | 73 | } |
68 | 74 | ||
69 | impl<'t> ParserImpl<'t> { | 75 | impl<S: TokenSource> ParserImpl<S> { |
70 | fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { | 76 | fn new(token_source: S) -> ParserImpl<S> { |
71 | ParserImpl { | 77 | ParserImpl { |
72 | parser_input: inp, | 78 | token_source, |
73 | pos: InputPosition::new(), | 79 | pos: InputPosition::new(), |
74 | events: Vec::new(), | 80 | events: Vec::new(), |
75 | steps: Cell::new(0), | 81 | steps: Cell::new(0), |
@@ -82,11 +88,9 @@ impl<'t> ParserImpl<'t> { | |||
82 | } | 88 | } |
83 | 89 | ||
84 | pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { | 90 | pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { |
85 | let c1 = self.parser_input.kind(self.pos); | 91 | let c1 = self.token_source.token_kind(self.pos); |
86 | let c2 = self.parser_input.kind(self.pos + 1); | 92 | let c2 = self.token_source.token_kind(self.pos + 1); |
87 | if self.parser_input.token_start_at(self.pos + 1) | 93 | if self.token_source.is_token_joint_to_next(self.pos) { |
88 | == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) | ||
89 | { | ||
90 | Some((c1, c2)) | 94 | Some((c1, c2)) |
91 | } else { | 95 | } else { |
92 | None | 96 | None |
@@ -94,14 +98,11 @@ impl<'t> ParserImpl<'t> { | |||
94 | } | 98 | } |
95 | 99 | ||
96 | pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { | 100 | pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { |
97 | let c1 = self.parser_input.kind(self.pos); | 101 | let c1 = self.token_source.token_kind(self.pos); |
98 | let c2 = self.parser_input.kind(self.pos + 1); | 102 | let c2 = self.token_source.token_kind(self.pos + 1); |
99 | let c3 = self.parser_input.kind(self.pos + 2); | 103 | let c3 = self.token_source.token_kind(self.pos + 2); |
100 | if self.parser_input.token_start_at(self.pos + 1) | 104 | if self.token_source.is_token_joint_to_next(self.pos) |
101 | == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) | 105 | && self.token_source.is_token_joint_to_next(self.pos + 1) |
102 | && self.parser_input.token_start_at(self.pos + 2) | ||
103 | == self.parser_input.token_start_at(self.pos + 1) | ||
104 | + self.parser_input.token_len(self.pos + 1) | ||
105 | { | 106 | { |
106 | Some((c1, c2, c3)) | 107 | Some((c1, c2, c3)) |
107 | } else { | 108 | } else { |
@@ -114,12 +115,11 @@ impl<'t> ParserImpl<'t> { | |||
114 | let steps = self.steps.get(); | 115 | let steps = self.steps.get(); |
115 | assert!(steps <= 10_000_000, "the parser seems stuck"); | 116 | assert!(steps <= 10_000_000, "the parser seems stuck"); |
116 | self.steps.set(steps + 1); | 117 | self.steps.set(steps + 1); |
117 | 118 | self.token_source.token_kind(self.pos + n) | |
118 | self.parser_input.kind(self.pos + n) | ||
119 | } | 119 | } |
120 | 120 | ||
121 | pub(super) fn at_kw(&self, t: &str) -> bool { | 121 | pub(super) fn at_kw(&self, kw: &str) -> bool { |
122 | self.parser_input.token_text(self.pos) == t | 122 | self.token_source.is_keyword(self.pos, kw) |
123 | } | 123 | } |
124 | 124 | ||
125 | /// Start parsing right behind the last event. | 125 | /// Start parsing right behind the last event. |
diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs index 275d94918..8ebbd3825 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs | |||
@@ -1,10 +1,40 @@ | |||
1 | use crate::{ | 1 | use crate::{ |
2 | SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, | 2 | SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, |
3 | parsing::lexer::Token, | 3 | parsing::{ |
4 | parser_impl::TokenSource, | ||
5 | lexer::Token, | ||
6 | }, | ||
4 | }; | 7 | }; |
5 | 8 | ||
6 | use std::ops::{Add, AddAssign}; | 9 | use std::ops::{Add, AddAssign}; |
7 | 10 | ||
11 | impl<'t> TokenSource for ParserInput<'t> { | ||
12 | fn token_kind(&self, pos: InputPosition) -> SyntaxKind { | ||
13 | let idx = pos.0 as usize; | ||
14 | if !(idx < self.tokens.len()) { | ||
15 | return EOF; | ||
16 | } | ||
17 | self.tokens[idx].kind | ||
18 | } | ||
19 | fn is_token_joint_to_next(&self, pos: InputPosition) -> bool { | ||
20 | let idx_curr = pos.0 as usize; | ||
21 | let idx_next = pos.0 as usize; | ||
22 | if !(idx_next < self.tokens.len()) { | ||
23 | return true; | ||
24 | } | ||
25 | self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] | ||
26 | } | ||
27 | fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool { | ||
28 | let idx = pos.0 as usize; | ||
29 | if !(idx < self.tokens.len()) { | ||
30 | return false; | ||
31 | } | ||
32 | let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); | ||
33 | |||
34 | self.text[range] == *kw | ||
35 | } | ||
36 | } | ||
37 | |||
8 | pub(crate) struct ParserInput<'t> { | 38 | pub(crate) struct ParserInput<'t> { |
9 | text: &'t str, | 39 | text: &'t str, |
10 | /// start position of each token(expect whitespace and comment) | 40 | /// start position of each token(expect whitespace and comment) |
@@ -41,43 +71,6 @@ impl<'t> ParserInput<'t> { | |||
41 | 71 | ||
42 | ParserInput { text, start_offsets, tokens } | 72 | ParserInput { text, start_offsets, tokens } |
43 | } | 73 | } |
44 | |||
45 | /// Get the syntax kind of token at given input position. | ||
46 | pub fn kind(&self, pos: InputPosition) -> SyntaxKind { | ||
47 | let idx = pos.0 as usize; | ||
48 | if !(idx < self.tokens.len()) { | ||
49 | return EOF; | ||
50 | } | ||
51 | self.tokens[idx].kind | ||
52 | } | ||
53 | |||
54 | /// Get the length of a token at given input position. | ||
55 | pub fn token_len(&self, pos: InputPosition) -> TextUnit { | ||
56 | let idx = pos.0 as usize; | ||
57 | if !(idx < self.tokens.len()) { | ||
58 | return 0.into(); | ||
59 | } | ||
60 | self.tokens[idx].len | ||
61 | } | ||
62 | |||
63 | /// Get the start position of a taken at given input position. | ||
64 | pub fn token_start_at(&self, pos: InputPosition) -> TextUnit { | ||
65 | let idx = pos.0 as usize; | ||
66 | if !(idx < self.tokens.len()) { | ||
67 | return 0.into(); | ||
68 | } | ||
69 | self.start_offsets[idx] | ||
70 | } | ||
71 | |||
72 | /// Get the raw text of a token at given input position. | ||
73 | pub fn token_text(&self, pos: InputPosition) -> &'t str { | ||
74 | let idx = pos.0 as usize; | ||
75 | if !(idx < self.tokens.len()) { | ||
76 | return ""; | ||
77 | } | ||
78 | let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); | ||
79 | &self.text[range] | ||
80 | } | ||
81 | } | 74 | } |
82 | 75 | ||
83 | #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] | 76 | #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] |