diff options
author | Aleksey Kladov <[email protected]> | 2018-07-31 21:38:19 +0100 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2018-07-31 21:44:31 +0100 |
commit | 7912189ec304b28c4df0030b5282cf3d21074154 (patch) | |
tree | 03a0a1b128439fdefbd1d012b392995ca8a6e264 /src/parser_impl | |
parent | d1400e95d7ad701fcba1191cb00968c2eae8b394 (diff) |
reorganize
Diffstat (limited to 'src/parser_impl')
-rw-r--r-- | src/parser_impl/event.rs | 156 | ||||
-rw-r--r-- | src/parser_impl/input.rs | 71 | ||||
-rw-r--r-- | src/parser_impl/mod.rs | 155 |
3 files changed, 382 insertions, 0 deletions
diff --git a/src/parser_impl/event.rs b/src/parser_impl/event.rs new file mode 100644 index 000000000..eb5d0a4be --- /dev/null +++ b/src/parser_impl/event.rs | |||
@@ -0,0 +1,156 @@ | |||
1 | //! This module provides a way to construct a `File`. | ||
2 | //! It is intended to be completely decoupled from the | ||
3 | //! parser, so as to allow to evolve the tree representation | ||
4 | //! and the parser algorithm independently. | ||
5 | //! | ||
6 | //! The `Sink` trait is the bridge between the parser and the | ||
7 | //! tree builder: the parser produces a stream of events like | ||
8 | //! `start node`, `finish node`, and `FileBuilder` converts | ||
9 | //! this stream to a real tree. | ||
10 | use { | ||
11 | lexer::Token, | ||
12 | parser_impl::Sink, | ||
13 | SyntaxKind::{self, TOMBSTONE}, | ||
14 | }; | ||
15 | |||
16 | /// `Parser` produces a flat list of `Event`s. | ||
17 | /// They are converted to a tree-structure in | ||
18 | /// a separate pass, via `TreeBuilder`. | ||
19 | #[derive(Debug)] | ||
20 | pub(crate) enum Event { | ||
21 | /// This event signifies the start of the node. | ||
22 | /// It should be either abandoned (in which case the | ||
23 | /// `kind` is `TOMBSTONE`, and the event is ignored), | ||
24 | /// or completed via a `Finish` event. | ||
25 | /// | ||
26 | /// All tokens between a `Start` and a `Finish` would | ||
27 | /// become the children of the respective node. | ||
28 | /// | ||
29 | /// For left-recursive syntactic constructs, the parser produces | ||
30 | /// a child node before it sees a parent. `forward_parent` | ||
31 | /// exists to allow to tweak parent-child relationships. | ||
32 | /// | ||
33 | /// Consider this path | ||
34 | /// | ||
35 | /// foo::bar | ||
36 | /// | ||
37 | /// The events for it would look like this: | ||
38 | /// | ||
39 | /// | ||
40 | /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH | ||
41 | /// | /\ | ||
42 | /// | | | ||
43 | /// +------forward-parent------+ | ||
44 | /// | ||
45 | /// And the tree would look like this | ||
46 | /// | ||
47 | /// +--PATH---------+ | ||
48 | /// | | | | ||
49 | /// | | | | ||
50 | /// | '::' 'bar' | ||
51 | /// | | ||
52 | /// PATH | ||
53 | /// | | ||
54 | /// 'foo' | ||
55 | /// | ||
56 | /// See also `CompletedMarker::precede`. | ||
57 | Start { | ||
58 | kind: SyntaxKind, | ||
59 | forward_parent: Option<u32>, | ||
60 | }, | ||
61 | |||
62 | /// Complete the previous `Start` event | ||
63 | Finish, | ||
64 | |||
65 | /// Produce a single leaf-element. | ||
66 | /// `n_raw_tokens` is used to glue complex contextual tokens. | ||
67 | /// For example, lexer tokenizes `>>` as `>`, `>`, and | ||
68 | /// `n_raw_tokens = 2` is used to produced a single `>>`. | ||
69 | Token { | ||
70 | kind: SyntaxKind, | ||
71 | n_raw_tokens: u8, | ||
72 | }, | ||
73 | |||
74 | Error { | ||
75 | msg: String, | ||
76 | }, | ||
77 | } | ||
78 | |||
79 | pub(super) fn process(builder: &mut impl Sink, tokens: &[Token], events: Vec<Event>) { | ||
80 | let mut idx = 0; | ||
81 | |||
82 | let mut holes = Vec::new(); | ||
83 | let mut forward_parents = Vec::new(); | ||
84 | |||
85 | for (i, event) in events.iter().enumerate() { | ||
86 | if holes.last() == Some(&i) { | ||
87 | holes.pop(); | ||
88 | continue; | ||
89 | } | ||
90 | |||
91 | match event { | ||
92 | &Event::Start { | ||
93 | kind: TOMBSTONE, .. | ||
94 | } => (), | ||
95 | |||
96 | &Event::Start { .. } => { | ||
97 | forward_parents.clear(); | ||
98 | let mut idx = i; | ||
99 | loop { | ||
100 | let (kind, fwd) = match events[idx] { | ||
101 | Event::Start { | ||
102 | kind, | ||
103 | forward_parent, | ||
104 | } => (kind, forward_parent), | ||
105 | _ => unreachable!(), | ||
106 | }; | ||
107 | forward_parents.push((idx, kind)); | ||
108 | if let Some(fwd) = fwd { | ||
109 | idx += fwd as usize; | ||
110 | } else { | ||
111 | break; | ||
112 | } | ||
113 | } | ||
114 | for &(idx, kind) in forward_parents.iter().into_iter().rev() { | ||
115 | builder.start_internal(kind); | ||
116 | holes.push(idx); | ||
117 | } | ||
118 | holes.pop(); | ||
119 | } | ||
120 | &Event::Finish => { | ||
121 | while idx < tokens.len() { | ||
122 | let token = tokens[idx]; | ||
123 | if token.kind.is_trivia() { | ||
124 | idx += 1; | ||
125 | builder.leaf(token.kind, token.len); | ||
126 | } else { | ||
127 | break; | ||
128 | } | ||
129 | } | ||
130 | builder.finish_internal() | ||
131 | } | ||
132 | &Event::Token { | ||
133 | kind, | ||
134 | mut n_raw_tokens, | ||
135 | } => { | ||
136 | // FIXME: currently, we attach whitespace to some random node | ||
137 | // this should be done in a sensible manner instead | ||
138 | loop { | ||
139 | let token = tokens[idx]; | ||
140 | if !token.kind.is_trivia() { | ||
141 | break; | ||
142 | } | ||
143 | builder.leaf(token.kind, token.len); | ||
144 | idx += 1 | ||
145 | } | ||
146 | let mut len = 0.into(); | ||
147 | for _ in 0..n_raw_tokens { | ||
148 | len += tokens[idx].len; | ||
149 | idx += 1; | ||
150 | } | ||
151 | builder.leaf(kind, len); | ||
152 | } | ||
153 | &Event::Error { ref msg } => builder.error(msg.clone()), | ||
154 | } | ||
155 | } | ||
156 | } | ||
diff --git a/src/parser_impl/input.rs b/src/parser_impl/input.rs new file mode 100644 index 000000000..db76364b2 --- /dev/null +++ b/src/parser_impl/input.rs | |||
@@ -0,0 +1,71 @@ | |||
1 | use {lexer::Token, SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit}; | ||
2 | |||
3 | use std::ops::{Add, AddAssign}; | ||
4 | |||
5 | pub(crate) struct ParserInput<'t> { | ||
6 | text: &'t str, | ||
7 | start_offsets: Vec<TextUnit>, | ||
8 | tokens: Vec<Token>, // non-whitespace tokens | ||
9 | } | ||
10 | |||
11 | impl<'t> ParserInput<'t> { | ||
12 | pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { | ||
13 | let mut tokens = Vec::new(); | ||
14 | let mut start_offsets = Vec::new(); | ||
15 | let mut len = 0.into(); | ||
16 | for &token in raw_tokens.iter() { | ||
17 | if !token.kind.is_trivia() { | ||
18 | tokens.push(token); | ||
19 | start_offsets.push(len); | ||
20 | } | ||
21 | len += token.len; | ||
22 | } | ||
23 | |||
24 | ParserInput { | ||
25 | text, | ||
26 | start_offsets, | ||
27 | tokens, | ||
28 | } | ||
29 | } | ||
30 | |||
31 | pub fn kind(&self, pos: InputPosition) -> SyntaxKind { | ||
32 | let idx = pos.0 as usize; | ||
33 | if !(idx < self.tokens.len()) { | ||
34 | return EOF; | ||
35 | } | ||
36 | self.tokens[idx].kind | ||
37 | } | ||
38 | |||
39 | #[allow(unused)] | ||
40 | pub fn text(&self, pos: InputPosition) -> &'t str { | ||
41 | let idx = pos.0 as usize; | ||
42 | if !(idx < self.tokens.len()) { | ||
43 | return ""; | ||
44 | } | ||
45 | let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); | ||
46 | &self.text[range] | ||
47 | } | ||
48 | } | ||
49 | |||
50 | #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] | ||
51 | pub(crate) struct InputPosition(u32); | ||
52 | |||
53 | impl InputPosition { | ||
54 | pub fn new() -> Self { | ||
55 | InputPosition(0) | ||
56 | } | ||
57 | } | ||
58 | |||
59 | impl Add<u32> for InputPosition { | ||
60 | type Output = InputPosition; | ||
61 | |||
62 | fn add(self, rhs: u32) -> InputPosition { | ||
63 | InputPosition(self.0 + rhs) | ||
64 | } | ||
65 | } | ||
66 | |||
67 | impl AddAssign<u32> for InputPosition { | ||
68 | fn add_assign(&mut self, rhs: u32) { | ||
69 | self.0 += rhs | ||
70 | } | ||
71 | } | ||
diff --git a/src/parser_impl/mod.rs b/src/parser_impl/mod.rs new file mode 100644 index 000000000..b58094be3 --- /dev/null +++ b/src/parser_impl/mod.rs | |||
@@ -0,0 +1,155 @@ | |||
1 | mod event; | ||
2 | mod input; | ||
3 | |||
4 | use { | ||
5 | grammar, | ||
6 | lexer::Token, | ||
7 | parser_api::Parser, | ||
8 | parser_impl::{ | ||
9 | event::{process, Event}, | ||
10 | input::{InputPosition, ParserInput}, | ||
11 | }, | ||
12 | TextUnit, | ||
13 | }; | ||
14 | |||
15 | use SyntaxKind::{self, EOF, TOMBSTONE}; | ||
16 | |||
17 | pub(crate) trait Sink { | ||
18 | type Tree; | ||
19 | |||
20 | fn new(text: String) -> Self; | ||
21 | |||
22 | fn leaf(&mut self, kind: SyntaxKind, len: TextUnit); | ||
23 | fn start_internal(&mut self, kind: SyntaxKind); | ||
24 | fn finish_internal(&mut self); | ||
25 | fn error(&mut self, err: String); | ||
26 | fn finish(self) -> Self::Tree; | ||
27 | } | ||
28 | |||
29 | /// Parse a sequence of tokens into the representative node tree | ||
30 | pub(crate) fn parse<S: Sink>(text: String, tokens: &[Token]) -> S::Tree { | ||
31 | let events = { | ||
32 | let input = input::ParserInput::new(&text, tokens); | ||
33 | let parser_impl = ParserImpl::new(&input); | ||
34 | let mut parser_api = Parser(parser_impl); | ||
35 | grammar::file(&mut parser_api); | ||
36 | parser_api.0.into_events() | ||
37 | }; | ||
38 | let mut sink = S::new(text); | ||
39 | process(&mut sink, tokens, events); | ||
40 | sink.finish() | ||
41 | } | ||
42 | |||
43 | /// Implementation details of `Parser`, extracted | ||
44 | /// to a separate struct in order not to pollute | ||
45 | /// the public API of the `Parser`. | ||
46 | pub(crate) struct ParserImpl<'t> { | ||
47 | inp: &'t ParserInput<'t>, | ||
48 | |||
49 | pos: InputPosition, | ||
50 | events: Vec<Event>, | ||
51 | } | ||
52 | |||
53 | impl<'t> ParserImpl<'t> { | ||
54 | pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { | ||
55 | ParserImpl { | ||
56 | inp, | ||
57 | |||
58 | pos: InputPosition::new(), | ||
59 | events: Vec::new(), | ||
60 | } | ||
61 | } | ||
62 | |||
63 | pub(crate) fn into_events(self) -> Vec<Event> { | ||
64 | assert_eq!(self.nth(0), EOF); | ||
65 | self.events | ||
66 | } | ||
67 | |||
68 | pub(super) fn nth(&self, n: u32) -> SyntaxKind { | ||
69 | self.inp.kind(self.pos + n) | ||
70 | } | ||
71 | |||
72 | pub(super) fn at_kw(&self, t: &str) -> bool { | ||
73 | self.inp.text(self.pos) == t | ||
74 | } | ||
75 | |||
76 | pub(super) fn start(&mut self) -> u32 { | ||
77 | let pos = self.events.len() as u32; | ||
78 | self.event(Event::Start { | ||
79 | kind: TOMBSTONE, | ||
80 | forward_parent: None, | ||
81 | }); | ||
82 | pos | ||
83 | } | ||
84 | |||
85 | pub(super) fn bump(&mut self) { | ||
86 | let kind = self.nth(0); | ||
87 | if kind == EOF { | ||
88 | return; | ||
89 | } | ||
90 | self.do_bump(kind); | ||
91 | } | ||
92 | |||
93 | pub(super) fn bump_remap(&mut self, kind: SyntaxKind) { | ||
94 | if self.nth(0) == EOF { | ||
95 | // TODO: panic!? | ||
96 | return; | ||
97 | } | ||
98 | self.do_bump(kind); | ||
99 | } | ||
100 | |||
101 | fn do_bump(&mut self, kind: SyntaxKind) { | ||
102 | self.pos += 1; | ||
103 | self.event(Event::Token { | ||
104 | kind, | ||
105 | n_raw_tokens: 1, | ||
106 | }); | ||
107 | } | ||
108 | |||
109 | pub(super) fn error(&mut self, msg: String) { | ||
110 | self.event(Event::Error { msg }) | ||
111 | } | ||
112 | |||
113 | pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { | ||
114 | match self.events[pos as usize] { | ||
115 | Event::Start { | ||
116 | kind: ref mut slot, .. | ||
117 | } => { | ||
118 | *slot = kind; | ||
119 | } | ||
120 | _ => unreachable!(), | ||
121 | } | ||
122 | self.event(Event::Finish); | ||
123 | } | ||
124 | |||
125 | pub(super) fn abandon(&mut self, pos: u32) { | ||
126 | let idx = pos as usize; | ||
127 | if idx == self.events.len() - 1 { | ||
128 | match self.events.pop() { | ||
129 | Some(Event::Start { | ||
130 | kind: TOMBSTONE, | ||
131 | forward_parent: None, | ||
132 | }) => (), | ||
133 | _ => unreachable!(), | ||
134 | } | ||
135 | } | ||
136 | } | ||
137 | |||
138 | pub(super) fn precede(&mut self, pos: u32) -> u32 { | ||
139 | let new_pos = self.start(); | ||
140 | match self.events[pos as usize] { | ||
141 | Event::Start { | ||
142 | ref mut forward_parent, | ||
143 | .. | ||
144 | } => { | ||
145 | *forward_parent = Some(new_pos - pos); | ||
146 | } | ||
147 | _ => unreachable!(), | ||
148 | } | ||
149 | new_pos | ||
150 | } | ||
151 | |||
152 | fn event(&mut self, event: Event) { | ||
153 | self.events.push(event) | ||
154 | } | ||
155 | } | ||