diff options
Diffstat (limited to 'src/parser_impl')
-rw-r--r-- | src/parser_impl/event.rs | 154 | ||||
-rw-r--r-- | src/parser_impl/input.rs | 86 | ||||
-rw-r--r-- | src/parser_impl/mod.rs | 170 |
3 files changed, 0 insertions, 410 deletions
diff --git a/src/parser_impl/event.rs b/src/parser_impl/event.rs deleted file mode 100644 index 9fd56b996..000000000 --- a/src/parser_impl/event.rs +++ /dev/null | |||
@@ -1,154 +0,0 @@ | |||
1 | //! This module provides a way to construct a `File`. | ||
2 | //! It is intended to be completely decoupled from the | ||
3 | //! parser, so as to allow to evolve the tree representation | ||
4 | //! and the parser algorithm independently. | ||
5 | //! | ||
6 | //! The `Sink` trait is the bridge between the parser and the | ||
7 | //! tree builder: the parser produces a stream of events like | ||
8 | //! `start node`, `finish node`, and `FileBuilder` converts | ||
9 | //! this stream to a real tree. | ||
10 | use std::mem; | ||
11 | use { | ||
12 | lexer::Token, | ||
13 | parser_impl::Sink, | ||
14 | SyntaxKind::{self, TOMBSTONE}, | ||
15 | }; | ||
16 | |||
17 | |||
18 | /// `Parser` produces a flat list of `Event`s. | ||
19 | /// They are converted to a tree-structure in | ||
20 | /// a separate pass, via `TreeBuilder`. | ||
21 | #[derive(Debug)] | ||
22 | pub(crate) enum Event { | ||
23 | /// This event signifies the start of the node. | ||
24 | /// It should be either abandoned (in which case the | ||
25 | /// `kind` is `TOMBSTONE`, and the event is ignored), | ||
26 | /// or completed via a `Finish` event. | ||
27 | /// | ||
28 | /// All tokens between a `Start` and a `Finish` would | ||
29 | /// become the children of the respective node. | ||
30 | /// | ||
31 | /// For left-recursive syntactic constructs, the parser produces | ||
32 | /// a child node before it sees a parent. `forward_parent` | ||
33 | /// exists to allow to tweak parent-child relationships. | ||
34 | /// | ||
35 | /// Consider this path | ||
36 | /// | ||
37 | /// foo::bar | ||
38 | /// | ||
39 | /// The events for it would look like this: | ||
40 | /// | ||
41 | /// | ||
42 | /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH | ||
43 | /// | /\ | ||
44 | /// | | | ||
45 | /// +------forward-parent------+ | ||
46 | /// | ||
47 | /// And the tree would look like this | ||
48 | /// | ||
49 | /// +--PATH---------+ | ||
50 | /// | | | | ||
51 | /// | | | | ||
52 | /// | '::' 'bar' | ||
53 | /// | | ||
54 | /// PATH | ||
55 | /// | | ||
56 | /// 'foo' | ||
57 | /// | ||
58 | /// See also `CompletedMarker::precede`. | ||
59 | Start { | ||
60 | kind: SyntaxKind, | ||
61 | forward_parent: Option<u32>, | ||
62 | }, | ||
63 | |||
64 | /// Complete the previous `Start` event | ||
65 | Finish, | ||
66 | |||
67 | /// Produce a single leaf-element. | ||
68 | /// `n_raw_tokens` is used to glue complex contextual tokens. | ||
69 | /// For example, lexer tokenizes `>>` as `>`, `>`, and | ||
70 | /// `n_raw_tokens = 2` is used to produced a single `>>`. | ||
71 | Token { | ||
72 | kind: SyntaxKind, | ||
73 | n_raw_tokens: u8, | ||
74 | }, | ||
75 | |||
76 | Error { | ||
77 | msg: String, | ||
78 | }, | ||
79 | } | ||
80 | |||
81 | |||
82 | pub(super) fn process<'a, S: Sink<'a>>(builder: &mut S, tokens: &[Token], mut events: Vec<Event>) { | ||
83 | fn tombstone() -> Event { | ||
84 | Event::Start { kind: TOMBSTONE, forward_parent: None } | ||
85 | } | ||
86 | let eat_ws = |idx: &mut usize, builder: &mut S| { | ||
87 | while let Some(token) = tokens.get(*idx) { | ||
88 | if !token.kind.is_trivia() { | ||
89 | break; | ||
90 | } | ||
91 | builder.leaf(token.kind, token.len); | ||
92 | *idx += 1 | ||
93 | } | ||
94 | }; | ||
95 | |||
96 | let events: &mut [Event] = &mut events; | ||
97 | let mut depth = 0; | ||
98 | let mut forward_parents = Vec::new(); | ||
99 | let mut next_tok_idx = 0; | ||
100 | for i in 0..events.len() { | ||
101 | match mem::replace(&mut events[i], tombstone()) { | ||
102 | Event::Start { | ||
103 | kind: TOMBSTONE, .. | ||
104 | } => (), | ||
105 | |||
106 | Event::Start { kind, forward_parent } => { | ||
107 | forward_parents.push(kind); | ||
108 | let mut idx = i; | ||
109 | let mut fp = forward_parent; | ||
110 | while let Some(fwd) = fp { | ||
111 | idx += fwd as usize; | ||
112 | fp = match mem::replace(&mut events[idx], tombstone()) { | ||
113 | Event::Start { | ||
114 | kind, | ||
115 | forward_parent, | ||
116 | } => { | ||
117 | forward_parents.push(kind); | ||
118 | forward_parent | ||
119 | }, | ||
120 | _ => unreachable!(), | ||
121 | }; | ||
122 | } | ||
123 | for kind in forward_parents.drain(..).rev() { | ||
124 | if depth > 0 { | ||
125 | eat_ws(&mut next_tok_idx, builder); | ||
126 | } | ||
127 | depth += 1; | ||
128 | builder.start_internal(kind); | ||
129 | } | ||
130 | } | ||
131 | Event::Finish => { | ||
132 | depth -= 1; | ||
133 | if depth == 0 { | ||
134 | eat_ws(&mut next_tok_idx, builder); | ||
135 | } | ||
136 | |||
137 | builder.finish_internal(); | ||
138 | } | ||
139 | Event::Token { | ||
140 | kind, | ||
141 | mut n_raw_tokens, | ||
142 | } => { | ||
143 | eat_ws(&mut next_tok_idx, builder); | ||
144 | let mut len = 0.into(); | ||
145 | for _ in 0..n_raw_tokens { | ||
146 | len += tokens[next_tok_idx].len; | ||
147 | next_tok_idx += 1; | ||
148 | } | ||
149 | builder.leaf(kind, len); | ||
150 | } | ||
151 | Event::Error { msg } => builder.error(msg), | ||
152 | } | ||
153 | } | ||
154 | } | ||
diff --git a/src/parser_impl/input.rs b/src/parser_impl/input.rs deleted file mode 100644 index c0fe4d488..000000000 --- a/src/parser_impl/input.rs +++ /dev/null | |||
@@ -1,86 +0,0 @@ | |||
1 | use {lexer::Token, SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit}; | ||
2 | |||
3 | use std::ops::{Add, AddAssign}; | ||
4 | |||
5 | pub(crate) struct ParserInput<'t> { | ||
6 | text: &'t str, | ||
7 | start_offsets: Vec<TextUnit>, | ||
8 | tokens: Vec<Token>, // non-whitespace tokens | ||
9 | } | ||
10 | |||
11 | impl<'t> ParserInput<'t> { | ||
12 | pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { | ||
13 | let mut tokens = Vec::new(); | ||
14 | let mut start_offsets = Vec::new(); | ||
15 | let mut len = 0.into(); | ||
16 | for &token in raw_tokens.iter() { | ||
17 | if !token.kind.is_trivia() { | ||
18 | tokens.push(token); | ||
19 | start_offsets.push(len); | ||
20 | } | ||
21 | len += token.len; | ||
22 | } | ||
23 | |||
24 | ParserInput { | ||
25 | text, | ||
26 | start_offsets, | ||
27 | tokens, | ||
28 | } | ||
29 | } | ||
30 | |||
31 | pub fn kind(&self, pos: InputPosition) -> SyntaxKind { | ||
32 | let idx = pos.0 as usize; | ||
33 | if !(idx < self.tokens.len()) { | ||
34 | return EOF; | ||
35 | } | ||
36 | self.tokens[idx].kind | ||
37 | } | ||
38 | |||
39 | pub fn len(&self, pos: InputPosition) -> TextUnit { | ||
40 | let idx = pos.0 as usize; | ||
41 | if !(idx < self.tokens.len()) { | ||
42 | return 0.into(); | ||
43 | } | ||
44 | self.tokens[idx].len | ||
45 | } | ||
46 | |||
47 | pub fn start(&self, pos: InputPosition) -> TextUnit { | ||
48 | let idx = pos.0 as usize; | ||
49 | if !(idx < self.tokens.len()) { | ||
50 | return 0.into(); | ||
51 | } | ||
52 | self.start_offsets[idx] | ||
53 | } | ||
54 | |||
55 | pub fn text(&self, pos: InputPosition) -> &'t str { | ||
56 | let idx = pos.0 as usize; | ||
57 | if !(idx < self.tokens.len()) { | ||
58 | return ""; | ||
59 | } | ||
60 | let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); | ||
61 | &self.text[range] | ||
62 | } | ||
63 | } | ||
64 | |||
65 | #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] | ||
66 | pub(crate) struct InputPosition(u32); | ||
67 | |||
68 | impl InputPosition { | ||
69 | pub fn new() -> Self { | ||
70 | InputPosition(0) | ||
71 | } | ||
72 | } | ||
73 | |||
74 | impl Add<u32> for InputPosition { | ||
75 | type Output = InputPosition; | ||
76 | |||
77 | fn add(self, rhs: u32) -> InputPosition { | ||
78 | InputPosition(self.0 + rhs) | ||
79 | } | ||
80 | } | ||
81 | |||
82 | impl AddAssign<u32> for InputPosition { | ||
83 | fn add_assign(&mut self, rhs: u32) { | ||
84 | self.0 += rhs | ||
85 | } | ||
86 | } | ||
diff --git a/src/parser_impl/mod.rs b/src/parser_impl/mod.rs deleted file mode 100644 index 06c16cdb4..000000000 --- a/src/parser_impl/mod.rs +++ /dev/null | |||
@@ -1,170 +0,0 @@ | |||
1 | mod event; | ||
2 | mod input; | ||
3 | |||
4 | use { | ||
5 | grammar, | ||
6 | lexer::Token, | ||
7 | parser_api::Parser, | ||
8 | parser_impl::{ | ||
9 | event::{process, Event}, | ||
10 | input::{InputPosition, ParserInput}, | ||
11 | }, | ||
12 | TextUnit, | ||
13 | }; | ||
14 | |||
15 | use SyntaxKind::{self, EOF, TOMBSTONE}; | ||
16 | |||
17 | pub(crate) trait Sink<'a> { | ||
18 | type Tree; | ||
19 | |||
20 | fn new(text: &'a str) -> Self; | ||
21 | |||
22 | fn leaf(&mut self, kind: SyntaxKind, len: TextUnit); | ||
23 | fn start_internal(&mut self, kind: SyntaxKind); | ||
24 | fn finish_internal(&mut self); | ||
25 | fn error(&mut self, err: String); | ||
26 | fn finish(self) -> Self::Tree; | ||
27 | } | ||
28 | |||
29 | /// Parse a sequence of tokens into the representative node tree | ||
30 | pub(crate) fn parse<'a, S: Sink<'a>>(text: &'a str, tokens: &[Token]) -> S::Tree { | ||
31 | let events = { | ||
32 | let input = input::ParserInput::new(text, tokens); | ||
33 | let parser_impl = ParserImpl::new(&input); | ||
34 | let mut parser_api = Parser(parser_impl); | ||
35 | grammar::file(&mut parser_api); | ||
36 | parser_api.0.into_events() | ||
37 | }; | ||
38 | let mut sink = S::new(text); | ||
39 | process(&mut sink, tokens, events); | ||
40 | sink.finish() | ||
41 | } | ||
42 | |||
43 | /// Implementation details of `Parser`, extracted | ||
44 | /// to a separate struct in order not to pollute | ||
45 | /// the public API of the `Parser`. | ||
46 | pub(crate) struct ParserImpl<'t> { | ||
47 | inp: &'t ParserInput<'t>, | ||
48 | |||
49 | pos: InputPosition, | ||
50 | events: Vec<Event>, | ||
51 | } | ||
52 | |||
53 | impl<'t> ParserImpl<'t> { | ||
54 | pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { | ||
55 | ParserImpl { | ||
56 | inp, | ||
57 | |||
58 | pos: InputPosition::new(), | ||
59 | events: Vec::new(), | ||
60 | } | ||
61 | } | ||
62 | |||
63 | pub(crate) fn into_events(self) -> Vec<Event> { | ||
64 | assert_eq!(self.nth(0), EOF); | ||
65 | self.events | ||
66 | } | ||
67 | |||
68 | pub(super) fn at_compound2(&self, c1: SyntaxKind, c2: SyntaxKind) -> bool { | ||
69 | self.inp.kind(self.pos) == c1 && self.inp.kind(self.pos + 1) == c2 | ||
70 | && self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos) | ||
71 | } | ||
72 | |||
73 | pub(super) fn at_compound3(&self, c1: SyntaxKind, c2: SyntaxKind, c3: SyntaxKind) -> bool { | ||
74 | self.inp.kind(self.pos) == c1 && self.inp.kind(self.pos + 1) == c2 && self.inp.kind(self.pos + 2) == c3 | ||
75 | && self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos) | ||
76 | && self.inp.start(self.pos + 2) == self.inp.start(self.pos + 1) + self.inp.len(self.pos + 1) | ||
77 | } | ||
78 | |||
79 | pub(super) fn nth(&self, n: u32) -> SyntaxKind { | ||
80 | self.inp.kind(self.pos + n) | ||
81 | } | ||
82 | |||
83 | pub(super) fn at_kw(&self, t: &str) -> bool { | ||
84 | self.inp.text(self.pos) == t | ||
85 | } | ||
86 | |||
87 | pub(super) fn start(&mut self) -> u32 { | ||
88 | let pos = self.events.len() as u32; | ||
89 | self.event(Event::Start { | ||
90 | kind: TOMBSTONE, | ||
91 | forward_parent: None, | ||
92 | }); | ||
93 | pos | ||
94 | } | ||
95 | |||
96 | pub(super) fn bump(&mut self) { | ||
97 | let kind = self.nth(0); | ||
98 | if kind == EOF { | ||
99 | return; | ||
100 | } | ||
101 | self.do_bump(kind, 1); | ||
102 | } | ||
103 | |||
104 | pub(super) fn bump_remap(&mut self, kind: SyntaxKind) { | ||
105 | if self.nth(0) == EOF { | ||
106 | // TODO: panic!? | ||
107 | return; | ||
108 | } | ||
109 | self.do_bump(kind, 1); | ||
110 | } | ||
111 | |||
112 | pub(super) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { | ||
113 | self.do_bump(kind, n); | ||
114 | } | ||
115 | |||
116 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { | ||
117 | self.pos += u32::from(n_raw_tokens); | ||
118 | self.event(Event::Token { | ||
119 | kind, | ||
120 | n_raw_tokens, | ||
121 | }); | ||
122 | } | ||
123 | |||
124 | pub(super) fn error(&mut self, msg: String) { | ||
125 | self.event(Event::Error { msg }) | ||
126 | } | ||
127 | |||
128 | pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { | ||
129 | match self.events[pos as usize] { | ||
130 | Event::Start { | ||
131 | kind: ref mut slot, .. | ||
132 | } => { | ||
133 | *slot = kind; | ||
134 | } | ||
135 | _ => unreachable!(), | ||
136 | } | ||
137 | self.event(Event::Finish); | ||
138 | } | ||
139 | |||
140 | pub(super) fn abandon(&mut self, pos: u32) { | ||
141 | let idx = pos as usize; | ||
142 | if idx == self.events.len() - 1 { | ||
143 | match self.events.pop() { | ||
144 | Some(Event::Start { | ||
145 | kind: TOMBSTONE, | ||
146 | forward_parent: None, | ||
147 | }) => (), | ||
148 | _ => unreachable!(), | ||
149 | } | ||
150 | } | ||
151 | } | ||
152 | |||
153 | pub(super) fn precede(&mut self, pos: u32) -> u32 { | ||
154 | let new_pos = self.start(); | ||
155 | match self.events[pos as usize] { | ||
156 | Event::Start { | ||
157 | ref mut forward_parent, | ||
158 | .. | ||
159 | } => { | ||
160 | *forward_parent = Some(new_pos - pos); | ||
161 | } | ||
162 | _ => unreachable!(), | ||
163 | } | ||
164 | new_pos | ||
165 | } | ||
166 | |||
167 | fn event(&mut self, event: Event) { | ||
168 | self.events.push(event) | ||
169 | } | ||
170 | } | ||