aboutsummaryrefslogtreecommitdiff
path: root/crates/libsyntax2/src/parser_impl
diff options
context:
space:
mode:
Diffstat (limited to 'crates/libsyntax2/src/parser_impl')
-rw-r--r--crates/libsyntax2/src/parser_impl/event.rs154
-rw-r--r--crates/libsyntax2/src/parser_impl/input.rs86
-rw-r--r--crates/libsyntax2/src/parser_impl/mod.rs170
3 files changed, 410 insertions, 0 deletions
diff --git a/crates/libsyntax2/src/parser_impl/event.rs b/crates/libsyntax2/src/parser_impl/event.rs
new file mode 100644
index 000000000..9fd56b996
--- /dev/null
+++ b/crates/libsyntax2/src/parser_impl/event.rs
@@ -0,0 +1,154 @@
1//! This module provides a way to construct a `File`.
2//! It is intended to be completely decoupled from the
3//! parser, so as to allow to evolve the tree representation
4//! and the parser algorithm independently.
5//!
6//! The `Sink` trait is the bridge between the parser and the
7//! tree builder: the parser produces a stream of events like
8//! `start node`, `finish node`, and `FileBuilder` converts
9//! this stream to a real tree.
10use std::mem;
11use {
12 lexer::Token,
13 parser_impl::Sink,
14 SyntaxKind::{self, TOMBSTONE},
15};
16
17
18/// `Parser` produces a flat list of `Event`s.
19/// They are converted to a tree-structure in
20/// a separate pass, via `TreeBuilder`.
21#[derive(Debug)]
22pub(crate) enum Event {
23 /// This event signifies the start of the node.
24 /// It should be either abandoned (in which case the
25 /// `kind` is `TOMBSTONE`, and the event is ignored),
26 /// or completed via a `Finish` event.
27 ///
28 /// All tokens between a `Start` and a `Finish` would
29 /// become the children of the respective node.
30 ///
31 /// For left-recursive syntactic constructs, the parser produces
32 /// a child node before it sees a parent. `forward_parent`
33 /// exists to allow to tweak parent-child relationships.
34 ///
35 /// Consider this path
36 ///
37 /// foo::bar
38 ///
39 /// The events for it would look like this:
40 ///
41 ///
42 /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH
43 /// | /\
44 /// | |
45 /// +------forward-parent------+
46 ///
47 /// And the tree would look like this
48 ///
49 /// +--PATH---------+
50 /// | | |
51 /// | | |
52 /// | '::' 'bar'
53 /// |
54 /// PATH
55 /// |
56 /// 'foo'
57 ///
58 /// See also `CompletedMarker::precede`.
59 Start {
60 kind: SyntaxKind,
61 forward_parent: Option<u32>,
62 },
63
64 /// Complete the previous `Start` event
65 Finish,
66
67 /// Produce a single leaf-element.
68 /// `n_raw_tokens` is used to glue complex contextual tokens.
69 /// For example, lexer tokenizes `>>` as `>`, `>`, and
70 /// `n_raw_tokens = 2` is used to produced a single `>>`.
71 Token {
72 kind: SyntaxKind,
73 n_raw_tokens: u8,
74 },
75
76 Error {
77 msg: String,
78 },
79}
80
81
82pub(super) fn process<'a, S: Sink<'a>>(builder: &mut S, tokens: &[Token], mut events: Vec<Event>) {
83 fn tombstone() -> Event {
84 Event::Start { kind: TOMBSTONE, forward_parent: None }
85 }
86 let eat_ws = |idx: &mut usize, builder: &mut S| {
87 while let Some(token) = tokens.get(*idx) {
88 if !token.kind.is_trivia() {
89 break;
90 }
91 builder.leaf(token.kind, token.len);
92 *idx += 1
93 }
94 };
95
96 let events: &mut [Event] = &mut events;
97 let mut depth = 0;
98 let mut forward_parents = Vec::new();
99 let mut next_tok_idx = 0;
100 for i in 0..events.len() {
101 match mem::replace(&mut events[i], tombstone()) {
102 Event::Start {
103 kind: TOMBSTONE, ..
104 } => (),
105
106 Event::Start { kind, forward_parent } => {
107 forward_parents.push(kind);
108 let mut idx = i;
109 let mut fp = forward_parent;
110 while let Some(fwd) = fp {
111 idx += fwd as usize;
112 fp = match mem::replace(&mut events[idx], tombstone()) {
113 Event::Start {
114 kind,
115 forward_parent,
116 } => {
117 forward_parents.push(kind);
118 forward_parent
119 },
120 _ => unreachable!(),
121 };
122 }
123 for kind in forward_parents.drain(..).rev() {
124 if depth > 0 {
125 eat_ws(&mut next_tok_idx, builder);
126 }
127 depth += 1;
128 builder.start_internal(kind);
129 }
130 }
131 Event::Finish => {
132 depth -= 1;
133 if depth == 0 {
134 eat_ws(&mut next_tok_idx, builder);
135 }
136
137 builder.finish_internal();
138 }
139 Event::Token {
140 kind,
141 mut n_raw_tokens,
142 } => {
143 eat_ws(&mut next_tok_idx, builder);
144 let mut len = 0.into();
145 for _ in 0..n_raw_tokens {
146 len += tokens[next_tok_idx].len;
147 next_tok_idx += 1;
148 }
149 builder.leaf(kind, len);
150 }
151 Event::Error { msg } => builder.error(msg),
152 }
153 }
154}
diff --git a/crates/libsyntax2/src/parser_impl/input.rs b/crates/libsyntax2/src/parser_impl/input.rs
new file mode 100644
index 000000000..c0fe4d488
--- /dev/null
+++ b/crates/libsyntax2/src/parser_impl/input.rs
@@ -0,0 +1,86 @@
1use {lexer::Token, SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit};
2
3use std::ops::{Add, AddAssign};
4
5pub(crate) struct ParserInput<'t> {
6 text: &'t str,
7 start_offsets: Vec<TextUnit>,
8 tokens: Vec<Token>, // non-whitespace tokens
9}
10
11impl<'t> ParserInput<'t> {
12 pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> {
13 let mut tokens = Vec::new();
14 let mut start_offsets = Vec::new();
15 let mut len = 0.into();
16 for &token in raw_tokens.iter() {
17 if !token.kind.is_trivia() {
18 tokens.push(token);
19 start_offsets.push(len);
20 }
21 len += token.len;
22 }
23
24 ParserInput {
25 text,
26 start_offsets,
27 tokens,
28 }
29 }
30
31 pub fn kind(&self, pos: InputPosition) -> SyntaxKind {
32 let idx = pos.0 as usize;
33 if !(idx < self.tokens.len()) {
34 return EOF;
35 }
36 self.tokens[idx].kind
37 }
38
39 pub fn len(&self, pos: InputPosition) -> TextUnit {
40 let idx = pos.0 as usize;
41 if !(idx < self.tokens.len()) {
42 return 0.into();
43 }
44 self.tokens[idx].len
45 }
46
47 pub fn start(&self, pos: InputPosition) -> TextUnit {
48 let idx = pos.0 as usize;
49 if !(idx < self.tokens.len()) {
50 return 0.into();
51 }
52 self.start_offsets[idx]
53 }
54
55 pub fn text(&self, pos: InputPosition) -> &'t str {
56 let idx = pos.0 as usize;
57 if !(idx < self.tokens.len()) {
58 return "";
59 }
60 let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len);
61 &self.text[range]
62 }
63}
64
65#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
66pub(crate) struct InputPosition(u32);
67
68impl InputPosition {
69 pub fn new() -> Self {
70 InputPosition(0)
71 }
72}
73
74impl Add<u32> for InputPosition {
75 type Output = InputPosition;
76
77 fn add(self, rhs: u32) -> InputPosition {
78 InputPosition(self.0 + rhs)
79 }
80}
81
82impl AddAssign<u32> for InputPosition {
83 fn add_assign(&mut self, rhs: u32) {
84 self.0 += rhs
85 }
86}
diff --git a/crates/libsyntax2/src/parser_impl/mod.rs b/crates/libsyntax2/src/parser_impl/mod.rs
new file mode 100644
index 000000000..06c16cdb4
--- /dev/null
+++ b/crates/libsyntax2/src/parser_impl/mod.rs
@@ -0,0 +1,170 @@
1mod event;
2mod input;
3
4use {
5 grammar,
6 lexer::Token,
7 parser_api::Parser,
8 parser_impl::{
9 event::{process, Event},
10 input::{InputPosition, ParserInput},
11 },
12 TextUnit,
13};
14
15use SyntaxKind::{self, EOF, TOMBSTONE};
16
17pub(crate) trait Sink<'a> {
18 type Tree;
19
20 fn new(text: &'a str) -> Self;
21
22 fn leaf(&mut self, kind: SyntaxKind, len: TextUnit);
23 fn start_internal(&mut self, kind: SyntaxKind);
24 fn finish_internal(&mut self);
25 fn error(&mut self, err: String);
26 fn finish(self) -> Self::Tree;
27}
28
29/// Parse a sequence of tokens into the representative node tree
30pub(crate) fn parse<'a, S: Sink<'a>>(text: &'a str, tokens: &[Token]) -> S::Tree {
31 let events = {
32 let input = input::ParserInput::new(text, tokens);
33 let parser_impl = ParserImpl::new(&input);
34 let mut parser_api = Parser(parser_impl);
35 grammar::file(&mut parser_api);
36 parser_api.0.into_events()
37 };
38 let mut sink = S::new(text);
39 process(&mut sink, tokens, events);
40 sink.finish()
41}
42
43/// Implementation details of `Parser`, extracted
44/// to a separate struct in order not to pollute
45/// the public API of the `Parser`.
46pub(crate) struct ParserImpl<'t> {
47 inp: &'t ParserInput<'t>,
48
49 pos: InputPosition,
50 events: Vec<Event>,
51}
52
53impl<'t> ParserImpl<'t> {
54 pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> {
55 ParserImpl {
56 inp,
57
58 pos: InputPosition::new(),
59 events: Vec::new(),
60 }
61 }
62
63 pub(crate) fn into_events(self) -> Vec<Event> {
64 assert_eq!(self.nth(0), EOF);
65 self.events
66 }
67
68 pub(super) fn at_compound2(&self, c1: SyntaxKind, c2: SyntaxKind) -> bool {
69 self.inp.kind(self.pos) == c1 && self.inp.kind(self.pos + 1) == c2
70 && self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos)
71 }
72
73 pub(super) fn at_compound3(&self, c1: SyntaxKind, c2: SyntaxKind, c3: SyntaxKind) -> bool {
74 self.inp.kind(self.pos) == c1 && self.inp.kind(self.pos + 1) == c2 && self.inp.kind(self.pos + 2) == c3
75 && self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos)
76 && self.inp.start(self.pos + 2) == self.inp.start(self.pos + 1) + self.inp.len(self.pos + 1)
77 }
78
79 pub(super) fn nth(&self, n: u32) -> SyntaxKind {
80 self.inp.kind(self.pos + n)
81 }
82
83 pub(super) fn at_kw(&self, t: &str) -> bool {
84 self.inp.text(self.pos) == t
85 }
86
87 pub(super) fn start(&mut self) -> u32 {
88 let pos = self.events.len() as u32;
89 self.event(Event::Start {
90 kind: TOMBSTONE,
91 forward_parent: None,
92 });
93 pos
94 }
95
96 pub(super) fn bump(&mut self) {
97 let kind = self.nth(0);
98 if kind == EOF {
99 return;
100 }
101 self.do_bump(kind, 1);
102 }
103
104 pub(super) fn bump_remap(&mut self, kind: SyntaxKind) {
105 if self.nth(0) == EOF {
106 // TODO: panic!?
107 return;
108 }
109 self.do_bump(kind, 1);
110 }
111
112 pub(super) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) {
113 self.do_bump(kind, n);
114 }
115
116 fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
117 self.pos += u32::from(n_raw_tokens);
118 self.event(Event::Token {
119 kind,
120 n_raw_tokens,
121 });
122 }
123
124 pub(super) fn error(&mut self, msg: String) {
125 self.event(Event::Error { msg })
126 }
127
128 pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) {
129 match self.events[pos as usize] {
130 Event::Start {
131 kind: ref mut slot, ..
132 } => {
133 *slot = kind;
134 }
135 _ => unreachable!(),
136 }
137 self.event(Event::Finish);
138 }
139
140 pub(super) fn abandon(&mut self, pos: u32) {
141 let idx = pos as usize;
142 if idx == self.events.len() - 1 {
143 match self.events.pop() {
144 Some(Event::Start {
145 kind: TOMBSTONE,
146 forward_parent: None,
147 }) => (),
148 _ => unreachable!(),
149 }
150 }
151 }
152
153 pub(super) fn precede(&mut self, pos: u32) -> u32 {
154 let new_pos = self.start();
155 match self.events[pos as usize] {
156 Event::Start {
157 ref mut forward_parent,
158 ..
159 } => {
160 *forward_parent = Some(new_pos - pos);
161 }
162 _ => unreachable!(),
163 }
164 new_pos
165 }
166
167 fn event(&mut self, event: Event) {
168 self.events.push(event)
169 }
170}