aboutsummaryrefslogtreecommitdiff
path: root/src/parser_impl
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2018-07-31 21:38:19 +0100
committerAleksey Kladov <[email protected]>2018-07-31 21:44:31 +0100
commit7912189ec304b28c4df0030b5282cf3d21074154 (patch)
tree03a0a1b128439fdefbd1d012b392995ca8a6e264 /src/parser_impl
parentd1400e95d7ad701fcba1191cb00968c2eae8b394 (diff)
reorganize
Diffstat (limited to 'src/parser_impl')
-rw-r--r--src/parser_impl/event.rs156
-rw-r--r--src/parser_impl/input.rs71
-rw-r--r--src/parser_impl/mod.rs155
3 files changed, 382 insertions, 0 deletions
diff --git a/src/parser_impl/event.rs b/src/parser_impl/event.rs
new file mode 100644
index 000000000..eb5d0a4be
--- /dev/null
+++ b/src/parser_impl/event.rs
@@ -0,0 +1,156 @@
1//! This module provides a way to construct a `File`.
2//! It is intended to be completely decoupled from the
3//! parser, so as to allow to evolve the tree representation
4//! and the parser algorithm independently.
5//!
6//! The `Sink` trait is the bridge between the parser and the
7//! tree builder: the parser produces a stream of events like
8//! `start node`, `finish node`, and `FileBuilder` converts
9//! this stream to a real tree.
10use {
11 lexer::Token,
12 parser_impl::Sink,
13 SyntaxKind::{self, TOMBSTONE},
14};
15
16/// `Parser` produces a flat list of `Event`s.
17/// They are converted to a tree-structure in
18/// a separate pass, via `TreeBuilder`.
19#[derive(Debug)]
20pub(crate) enum Event {
21 /// This event signifies the start of the node.
22 /// It should be either abandoned (in which case the
23 /// `kind` is `TOMBSTONE`, and the event is ignored),
24 /// or completed via a `Finish` event.
25 ///
26 /// All tokens between a `Start` and a `Finish` would
27 /// become the children of the respective node.
28 ///
29 /// For left-recursive syntactic constructs, the parser produces
30 /// a child node before it sees a parent. `forward_parent`
31 /// exists to allow to tweak parent-child relationships.
32 ///
33 /// Consider this path
34 ///
35 /// foo::bar
36 ///
37 /// The events for it would look like this:
38 ///
39 ///
40 /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH
41 /// | /\
42 /// | |
43 /// +------forward-parent------+
44 ///
45 /// And the tree would look like this
46 ///
47 /// +--PATH---------+
48 /// | | |
49 /// | | |
50 /// | '::' 'bar'
51 /// |
52 /// PATH
53 /// |
54 /// 'foo'
55 ///
56 /// See also `CompletedMarker::precede`.
57 Start {
58 kind: SyntaxKind,
59 forward_parent: Option<u32>,
60 },
61
62 /// Complete the previous `Start` event
63 Finish,
64
65 /// Produce a single leaf-element.
66 /// `n_raw_tokens` is used to glue complex contextual tokens.
67 /// For example, lexer tokenizes `>>` as `>`, `>`, and
68 /// `n_raw_tokens = 2` is used to produced a single `>>`.
69 Token {
70 kind: SyntaxKind,
71 n_raw_tokens: u8,
72 },
73
74 Error {
75 msg: String,
76 },
77}
78
79pub(super) fn process(builder: &mut impl Sink, tokens: &[Token], events: Vec<Event>) {
80 let mut idx = 0;
81
82 let mut holes = Vec::new();
83 let mut forward_parents = Vec::new();
84
85 for (i, event) in events.iter().enumerate() {
86 if holes.last() == Some(&i) {
87 holes.pop();
88 continue;
89 }
90
91 match event {
92 &Event::Start {
93 kind: TOMBSTONE, ..
94 } => (),
95
96 &Event::Start { .. } => {
97 forward_parents.clear();
98 let mut idx = i;
99 loop {
100 let (kind, fwd) = match events[idx] {
101 Event::Start {
102 kind,
103 forward_parent,
104 } => (kind, forward_parent),
105 _ => unreachable!(),
106 };
107 forward_parents.push((idx, kind));
108 if let Some(fwd) = fwd {
109 idx += fwd as usize;
110 } else {
111 break;
112 }
113 }
114 for &(idx, kind) in forward_parents.iter().into_iter().rev() {
115 builder.start_internal(kind);
116 holes.push(idx);
117 }
118 holes.pop();
119 }
120 &Event::Finish => {
121 while idx < tokens.len() {
122 let token = tokens[idx];
123 if token.kind.is_trivia() {
124 idx += 1;
125 builder.leaf(token.kind, token.len);
126 } else {
127 break;
128 }
129 }
130 builder.finish_internal()
131 }
132 &Event::Token {
133 kind,
134 mut n_raw_tokens,
135 } => {
136 // FIXME: currently, we attach whitespace to some random node
137 // this should be done in a sensible manner instead
138 loop {
139 let token = tokens[idx];
140 if !token.kind.is_trivia() {
141 break;
142 }
143 builder.leaf(token.kind, token.len);
144 idx += 1
145 }
146 let mut len = 0.into();
147 for _ in 0..n_raw_tokens {
148 len += tokens[idx].len;
149 idx += 1;
150 }
151 builder.leaf(kind, len);
152 }
153 &Event::Error { ref msg } => builder.error(msg.clone()),
154 }
155 }
156}
diff --git a/src/parser_impl/input.rs b/src/parser_impl/input.rs
new file mode 100644
index 000000000..db76364b2
--- /dev/null
+++ b/src/parser_impl/input.rs
@@ -0,0 +1,71 @@
1use {lexer::Token, SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit};
2
3use std::ops::{Add, AddAssign};
4
5pub(crate) struct ParserInput<'t> {
6 text: &'t str,
7 start_offsets: Vec<TextUnit>,
8 tokens: Vec<Token>, // non-whitespace tokens
9}
10
11impl<'t> ParserInput<'t> {
12 pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> {
13 let mut tokens = Vec::new();
14 let mut start_offsets = Vec::new();
15 let mut len = 0.into();
16 for &token in raw_tokens.iter() {
17 if !token.kind.is_trivia() {
18 tokens.push(token);
19 start_offsets.push(len);
20 }
21 len += token.len;
22 }
23
24 ParserInput {
25 text,
26 start_offsets,
27 tokens,
28 }
29 }
30
31 pub fn kind(&self, pos: InputPosition) -> SyntaxKind {
32 let idx = pos.0 as usize;
33 if !(idx < self.tokens.len()) {
34 return EOF;
35 }
36 self.tokens[idx].kind
37 }
38
39 #[allow(unused)]
40 pub fn text(&self, pos: InputPosition) -> &'t str {
41 let idx = pos.0 as usize;
42 if !(idx < self.tokens.len()) {
43 return "";
44 }
45 let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len);
46 &self.text[range]
47 }
48}
49
50#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
51pub(crate) struct InputPosition(u32);
52
53impl InputPosition {
54 pub fn new() -> Self {
55 InputPosition(0)
56 }
57}
58
59impl Add<u32> for InputPosition {
60 type Output = InputPosition;
61
62 fn add(self, rhs: u32) -> InputPosition {
63 InputPosition(self.0 + rhs)
64 }
65}
66
67impl AddAssign<u32> for InputPosition {
68 fn add_assign(&mut self, rhs: u32) {
69 self.0 += rhs
70 }
71}
diff --git a/src/parser_impl/mod.rs b/src/parser_impl/mod.rs
new file mode 100644
index 000000000..b58094be3
--- /dev/null
+++ b/src/parser_impl/mod.rs
@@ -0,0 +1,155 @@
1mod event;
2mod input;
3
4use {
5 grammar,
6 lexer::Token,
7 parser_api::Parser,
8 parser_impl::{
9 event::{process, Event},
10 input::{InputPosition, ParserInput},
11 },
12 TextUnit,
13};
14
15use SyntaxKind::{self, EOF, TOMBSTONE};
16
17pub(crate) trait Sink {
18 type Tree;
19
20 fn new(text: String) -> Self;
21
22 fn leaf(&mut self, kind: SyntaxKind, len: TextUnit);
23 fn start_internal(&mut self, kind: SyntaxKind);
24 fn finish_internal(&mut self);
25 fn error(&mut self, err: String);
26 fn finish(self) -> Self::Tree;
27}
28
29/// Parse a sequence of tokens into the representative node tree
30pub(crate) fn parse<S: Sink>(text: String, tokens: &[Token]) -> S::Tree {
31 let events = {
32 let input = input::ParserInput::new(&text, tokens);
33 let parser_impl = ParserImpl::new(&input);
34 let mut parser_api = Parser(parser_impl);
35 grammar::file(&mut parser_api);
36 parser_api.0.into_events()
37 };
38 let mut sink = S::new(text);
39 process(&mut sink, tokens, events);
40 sink.finish()
41}
42
43/// Implementation details of `Parser`, extracted
44/// to a separate struct in order not to pollute
45/// the public API of the `Parser`.
46pub(crate) struct ParserImpl<'t> {
47 inp: &'t ParserInput<'t>,
48
49 pos: InputPosition,
50 events: Vec<Event>,
51}
52
53impl<'t> ParserImpl<'t> {
54 pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> {
55 ParserImpl {
56 inp,
57
58 pos: InputPosition::new(),
59 events: Vec::new(),
60 }
61 }
62
63 pub(crate) fn into_events(self) -> Vec<Event> {
64 assert_eq!(self.nth(0), EOF);
65 self.events
66 }
67
68 pub(super) fn nth(&self, n: u32) -> SyntaxKind {
69 self.inp.kind(self.pos + n)
70 }
71
72 pub(super) fn at_kw(&self, t: &str) -> bool {
73 self.inp.text(self.pos) == t
74 }
75
76 pub(super) fn start(&mut self) -> u32 {
77 let pos = self.events.len() as u32;
78 self.event(Event::Start {
79 kind: TOMBSTONE,
80 forward_parent: None,
81 });
82 pos
83 }
84
85 pub(super) fn bump(&mut self) {
86 let kind = self.nth(0);
87 if kind == EOF {
88 return;
89 }
90 self.do_bump(kind);
91 }
92
93 pub(super) fn bump_remap(&mut self, kind: SyntaxKind) {
94 if self.nth(0) == EOF {
95 // TODO: panic!?
96 return;
97 }
98 self.do_bump(kind);
99 }
100
101 fn do_bump(&mut self, kind: SyntaxKind) {
102 self.pos += 1;
103 self.event(Event::Token {
104 kind,
105 n_raw_tokens: 1,
106 });
107 }
108
109 pub(super) fn error(&mut self, msg: String) {
110 self.event(Event::Error { msg })
111 }
112
113 pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) {
114 match self.events[pos as usize] {
115 Event::Start {
116 kind: ref mut slot, ..
117 } => {
118 *slot = kind;
119 }
120 _ => unreachable!(),
121 }
122 self.event(Event::Finish);
123 }
124
125 pub(super) fn abandon(&mut self, pos: u32) {
126 let idx = pos as usize;
127 if idx == self.events.len() - 1 {
128 match self.events.pop() {
129 Some(Event::Start {
130 kind: TOMBSTONE,
131 forward_parent: None,
132 }) => (),
133 _ => unreachable!(),
134 }
135 }
136 }
137
138 pub(super) fn precede(&mut self, pos: u32) -> u32 {
139 let new_pos = self.start();
140 match self.events[pos as usize] {
141 Event::Start {
142 ref mut forward_parent,
143 ..
144 } => {
145 *forward_parent = Some(new_pos - pos);
146 }
147 _ => unreachable!(),
148 }
149 new_pos
150 }
151
152 fn event(&mut self, event: Event) {
153 self.events.push(event)
154 }
155}