diff options
-rw-r--r-- | crates/ra_syntax/src/parsing/text_token_source.rs | 89 |
1 files changed, 43 insertions, 46 deletions
diff --git a/crates/ra_syntax/src/parsing/text_token_source.rs b/crates/ra_syntax/src/parsing/text_token_source.rs index 7ddc2c2c3..97aa3e795 100644 --- a/crates/ra_syntax/src/parsing/text_token_source.rs +++ b/crates/ra_syntax/src/parsing/text_token_source.rs | |||
@@ -1,40 +1,35 @@ | |||
1 | //! FIXME: write short doc here | 1 | //! See `TextTokenSource` docs. |
2 | 2 | ||
3 | use ra_parser::Token as PToken; | ||
4 | use ra_parser::TokenSource; | 3 | use ra_parser::TokenSource; |
5 | 4 | ||
6 | use crate::{parsing::lexer::Token, SyntaxKind::EOF, TextRange, TextSize}; | 5 | use crate::{parsing::lexer::Token, SyntaxKind::EOF, TextRange, TextSize}; |
7 | 6 | ||
7 | /// Implementation of `ra_parser::TokenSource` that takes tokens from source code text. | ||
8 | pub(crate) struct TextTokenSource<'t> { | 8 | pub(crate) struct TextTokenSource<'t> { |
9 | text: &'t str, | 9 | text: &'t str, |
10 | /// start position of each token(expect whitespace and comment) | 10 | /// token and its start position (non-whitespace/comment tokens) |
11 | /// ```non-rust | 11 | /// ```non-rust |
12 | /// struct Foo; | 12 | /// struct Foo; |
13 | /// ^------^--- | 13 | /// ^------^--^- |
14 | /// | | ^- | 14 | /// | | \________ |
15 | /// 0 7 10 | 15 | /// | \____ \ |
16 | /// | \ | | ||
17 | /// (struct, 0) (Foo, 7) (;, 10) | ||
16 | /// ``` | 18 | /// ``` |
17 | /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` | 19 | /// `[(struct, 0), (Foo, 7), (;, 10)]` |
18 | start_offsets: Vec<TextSize>, | 20 | token_offset_pairs: Vec<(Token, TextSize)>, |
19 | /// non-whitespace/comment tokens | ||
20 | /// ```non-rust | ||
21 | /// struct Foo {} | ||
22 | /// ^^^^^^ ^^^ ^^ | ||
23 | /// ``` | ||
24 | /// tokens: `[struct, Foo, {, }]` | ||
25 | tokens: Vec<Token>, | ||
26 | 21 | ||
27 | /// Current token and position | 22 | /// Current token and position |
28 | curr: (PToken, usize), | 23 | curr: (ra_parser::Token, usize), |
29 | } | 24 | } |
30 | 25 | ||
31 | impl<'t> TokenSource for TextTokenSource<'t> { | 26 | impl<'t> TokenSource for TextTokenSource<'t> { |
32 | fn current(&self) -> PToken { | 27 | fn current(&self) -> ra_parser::Token { |
33 | self.curr.0 | 28 | self.curr.0 |
34 | } | 29 | } |
35 | 30 | ||
36 | fn lookahead_nth(&self, n: usize) -> PToken { | 31 | fn lookahead_nth(&self, n: usize) -> ra_parser::Token { |
37 | mk_token(self.curr.1 + n, &self.start_offsets, &self.tokens) | 32 | mk_token(self.curr.1 + n, &self.token_offset_pairs) |
38 | } | 33 | } |
39 | 34 | ||
40 | fn bump(&mut self) { | 35 | fn bump(&mut self) { |
@@ -43,45 +38,47 @@ impl<'t> TokenSource for TextTokenSource<'t> { | |||
43 | } | 38 | } |
44 | 39 | ||
45 | let pos = self.curr.1 + 1; | 40 | let pos = self.curr.1 + 1; |
46 | self.curr = (mk_token(pos, &self.start_offsets, &self.tokens), pos); | 41 | self.curr = (mk_token(pos, &self.token_offset_pairs), pos); |
47 | } | 42 | } |
48 | 43 | ||
49 | fn is_keyword(&self, kw: &str) -> bool { | 44 | fn is_keyword(&self, kw: &str) -> bool { |
50 | let pos = self.curr.1; | 45 | self.token_offset_pairs |
51 | if pos >= self.tokens.len() { | 46 | .get(self.curr.1) |
52 | return false; | 47 | .map(|(token, offset)| &self.text[TextRange::at(*offset, token.len)] == kw) |
53 | } | 48 | .unwrap_or(false) |
54 | let range = TextRange::at(self.start_offsets[pos], self.tokens[pos].len); | ||
55 | self.text[range] == *kw | ||
56 | } | 49 | } |
57 | } | 50 | } |
58 | 51 | ||
59 | fn mk_token(pos: usize, start_offsets: &[TextSize], tokens: &[Token]) -> PToken { | 52 | fn mk_token(pos: usize, token_offset_pairs: &[(Token, TextSize)]) -> ra_parser::Token { |
60 | let kind = tokens.get(pos).map(|t| t.kind).unwrap_or(EOF); | 53 | let (kind, is_jointed_to_next) = match token_offset_pairs.get(pos) { |
61 | let is_jointed_to_next = if pos + 1 < start_offsets.len() { | 54 | Some((token, offset)) => ( |
62 | start_offsets[pos] + tokens[pos].len == start_offsets[pos + 1] | 55 | token.kind, |
63 | } else { | 56 | token_offset_pairs |
64 | false | 57 | .get(pos + 1) |
58 | .map(|(_, next_offset)| offset + token.len == *next_offset) | ||
59 | .unwrap_or(false), | ||
60 | ), | ||
61 | None => (EOF, false), | ||
65 | }; | 62 | }; |
66 | 63 | ra_parser::Token { kind, is_jointed_to_next } | |
67 | PToken { kind, is_jointed_to_next } | ||
68 | } | 64 | } |
69 | 65 | ||
70 | impl<'t> TextTokenSource<'t> { | 66 | impl<'t> TextTokenSource<'t> { |
71 | /// Generate input from tokens(expect comment and whitespace). | 67 | /// Generate input from tokens(expect comment and whitespace). |
72 | pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> TextTokenSource<'t> { | 68 | pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> TextTokenSource<'t> { |
73 | let mut tokens = Vec::new(); | 69 | let token_offset_pairs: Vec<_> = raw_tokens |
74 | let mut start_offsets = Vec::new(); | 70 | .iter() |
75 | let mut len = 0.into(); | 71 | .filter_map({ |
76 | for &token in raw_tokens.iter() { | 72 | let mut len = 0.into(); |
77 | if !token.kind.is_trivia() { | 73 | move |token| { |
78 | tokens.push(token); | 74 | let pair = if token.kind.is_trivia() { None } else { Some((*token, len)) }; |
79 | start_offsets.push(len); | 75 | len += token.len; |
80 | } | 76 | pair |
81 | len += token.len; | 77 | } |
82 | } | 78 | }) |
79 | .collect(); | ||
83 | 80 | ||
84 | let first = mk_token(0, &start_offsets, &tokens); | 81 | let first = mk_token(0, &token_offset_pairs); |
85 | TextTokenSource { text, start_offsets, tokens, curr: (first, 0) } | 82 | TextTokenSource { text, token_offset_pairs, curr: (first, 0) } |
86 | } | 83 | } |
87 | } | 84 | } |