From 50a02eb3593591a02677e1b56f24d7ff0459b9d0 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 12 Aug 2020 17:06:49 +0200 Subject: Rename ra_parser -> parser --- crates/parser/src/parser.rs | 350 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 350 insertions(+) create mode 100644 crates/parser/src/parser.rs (limited to 'crates/parser/src/parser.rs') diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs new file mode 100644 index 000000000..d2487acc3 --- /dev/null +++ b/crates/parser/src/parser.rs @@ -0,0 +1,350 @@ +//! FIXME: write short doc here + +use std::cell::Cell; + +use drop_bomb::DropBomb; + +use crate::{ + event::Event, + ParseError, + SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, + TokenSet, TokenSource, T, +}; + +/// `Parser` struct provides the low-level API for +/// navigating through the stream of tokens and +/// constructing the parse tree. The actual parsing +/// happens in the `grammar` module. +/// +/// However, the result of this `Parser` is not a real +/// tree, but rather a flat stream of events of the form +/// "start expression, consume number literal, +/// finish expression". See `Event` docs for more. +pub(crate) struct Parser<'t> { + token_source: &'t mut dyn TokenSource, + events: Vec, + steps: Cell, +} + +impl<'t> Parser<'t> { + pub(super) fn new(token_source: &'t mut dyn TokenSource) -> Parser<'t> { + Parser { token_source, events: Vec::new(), steps: Cell::new(0) } + } + + pub(crate) fn finish(self) -> Vec { + self.events + } + + /// Returns the kind of the current token. + /// If parser has already reached the end of input, + /// the special `EOF` kind is returned. + pub(crate) fn current(&self) -> SyntaxKind { + self.nth(0) + } + + /// Lookahead operation: returns the kind of the next nth + /// token. + pub(crate) fn nth(&self, n: usize) -> SyntaxKind { + assert!(n <= 3); + + let steps = self.steps.get(); + assert!(steps <= 10_000_000, "the parser seems stuck"); + self.steps.set(steps + 1); + + self.token_source.lookahead_nth(n).kind + } + + /// Checks if the current token is `kind`. + pub(crate) fn at(&self, kind: SyntaxKind) -> bool { + self.nth_at(0, kind) + } + + pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool { + match kind { + T![-=] => self.at_composite2(n, T![-], T![=]), + T![->] => self.at_composite2(n, T![-], T![>]), + T![::] => self.at_composite2(n, T![:], T![:]), + T![!=] => self.at_composite2(n, T![!], T![=]), + T![..] => self.at_composite2(n, T![.], T![.]), + T![*=] => self.at_composite2(n, T![*], T![=]), + T![/=] => self.at_composite2(n, T![/], T![=]), + T![&&] => self.at_composite2(n, T![&], T![&]), + T![&=] => self.at_composite2(n, T![&], T![=]), + T![%=] => self.at_composite2(n, T![%], T![=]), + T![^=] => self.at_composite2(n, T![^], T![=]), + T![+=] => self.at_composite2(n, T![+], T![=]), + T![<<] => self.at_composite2(n, T![<], T![<]), + T![<=] => self.at_composite2(n, T![<], T![=]), + T![==] => self.at_composite2(n, T![=], T![=]), + T![=>] => self.at_composite2(n, T![=], T![>]), + T![>=] => self.at_composite2(n, T![>], T![=]), + T![>>] => self.at_composite2(n, T![>], T![>]), + T![|=] => self.at_composite2(n, T![|], T![=]), + T![||] => self.at_composite2(n, T![|], T![|]), + + T![...] => self.at_composite3(n, T![.], T![.], T![.]), + T![..=] => self.at_composite3(n, T![.], T![.], T![=]), + T![<<=] => self.at_composite3(n, T![<], T![<], T![=]), + T![>>=] => self.at_composite3(n, T![>], T![>], T![=]), + + _ => self.token_source.lookahead_nth(n).kind == kind, + } + } + + /// Consume the next token if `kind` matches. + pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { + if !self.at(kind) { + return false; + } + let n_raw_tokens = match kind { + T![-=] + | T![->] + | T![::] + | T![!=] + | T![..] + | T![*=] + | T![/=] + | T![&&] + | T![&=] + | T![%=] + | T![^=] + | T![+=] + | T![<<] + | T![<=] + | T![==] + | T![=>] + | T![>=] + | T![>>] + | T![|=] + | T![||] => 2, + + T![...] | T![..=] | T![<<=] | T![>>=] => 3, + _ => 1, + }; + self.do_bump(kind, n_raw_tokens); + true + } + + fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool { + let t1 = self.token_source.lookahead_nth(n); + if t1.kind != k1 || !t1.is_jointed_to_next { + return false; + } + let t2 = self.token_source.lookahead_nth(n + 1); + t2.kind == k2 + } + + fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool { + let t1 = self.token_source.lookahead_nth(n); + if t1.kind != k1 || !t1.is_jointed_to_next { + return false; + } + let t2 = self.token_source.lookahead_nth(n + 1); + if t2.kind != k2 || !t2.is_jointed_to_next { + return false; + } + let t3 = self.token_source.lookahead_nth(n + 2); + t3.kind == k3 + } + + /// Checks if the current token is in `kinds`. + pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool { + kinds.contains(self.current()) + } + + /// Checks if the current token is contextual keyword with text `t`. + pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { + self.token_source.is_keyword(kw) + } + + /// Starts a new node in the syntax tree. All nodes and tokens + /// consumed between the `start` and the corresponding `Marker::complete` + /// belong to the same node. + pub(crate) fn start(&mut self) -> Marker { + let pos = self.events.len() as u32; + self.push_event(Event::tombstone()); + Marker::new(pos) + } + + /// Consume the next token if `kind` matches. + pub(crate) fn bump(&mut self, kind: SyntaxKind) { + assert!(self.eat(kind)); + } + + /// Advances the parser by one token + pub(crate) fn bump_any(&mut self) { + let kind = self.nth(0); + if kind == EOF { + return; + } + self.do_bump(kind, 1) + } + + /// Advances the parser by one token, remapping its kind. + /// This is useful to create contextual keywords from + /// identifiers. For example, the lexer creates an `union` + /// *identifier* token, but the parser remaps it to the + /// `union` keyword, and keyword is what ends up in the + /// final tree. + pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { + if self.nth(0) == EOF { + // FIXME: panic!? + return; + } + self.do_bump(kind, 1); + } + + /// Emit error with the `message` + /// FIXME: this should be much more fancy and support + /// structured errors with spans and notes, like rustc + /// does. + pub(crate) fn error>(&mut self, message: T) { + let msg = ParseError(Box::new(message.into())); + self.push_event(Event::Error { msg }) + } + + /// Consume the next token if it is `kind` or emit an error + /// otherwise. + pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { + if self.eat(kind) { + return true; + } + self.error(format!("expected {:?}", kind)); + false + } + + /// Create an error node and consume the next token. + pub(crate) fn err_and_bump(&mut self, message: &str) { + self.err_recover(message, TokenSet::EMPTY); + } + + /// Create an error node and consume the next token. + pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) { + match self.current() { + T!['{'] | T!['}'] => { + self.error(message); + return; + } + _ => (), + } + + if self.at_ts(recovery) { + self.error(message); + return; + } + + let m = self.start(); + self.error(message); + self.bump_any(); + m.complete(self, ERROR); + } + + fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { + for _ in 0..n_raw_tokens { + self.token_source.bump(); + } + + self.push_event(Event::Token { kind, n_raw_tokens }); + } + + fn push_event(&mut self, event: Event) { + self.events.push(event) + } +} + +/// See `Parser::start`. +pub(crate) struct Marker { + pos: u32, + bomb: DropBomb, +} + +impl Marker { + fn new(pos: u32) -> Marker { + Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") } + } + + /// Finishes the syntax tree node and assigns `kind` to it, + /// and mark the create a `CompletedMarker` for possible future + /// operation like `.precede()` to deal with forward_parent. + pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { + self.bomb.defuse(); + let idx = self.pos as usize; + match &mut p.events[idx] { + Event::Start { kind: slot, .. } => { + *slot = kind; + } + _ => unreachable!(), + } + let finish_pos = p.events.len() as u32; + p.push_event(Event::Finish); + CompletedMarker::new(self.pos, finish_pos, kind) + } + + /// Abandons the syntax tree node. All its children + /// are attached to its parent instead. + pub(crate) fn abandon(mut self, p: &mut Parser) { + self.bomb.defuse(); + let idx = self.pos as usize; + if idx == p.events.len() - 1 { + match p.events.pop() { + Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), + _ => unreachable!(), + } + } + } +} + +pub(crate) struct CompletedMarker { + start_pos: u32, + finish_pos: u32, + kind: SyntaxKind, +} + +impl CompletedMarker { + fn new(start_pos: u32, finish_pos: u32, kind: SyntaxKind) -> Self { + CompletedMarker { start_pos, finish_pos, kind } + } + + /// This method allows to create a new node which starts + /// *before* the current one. That is, parser could start + /// node `A`, then complete it, and then after parsing the + /// whole `A`, decide that it should have started some node + /// `B` before starting `A`. `precede` allows to do exactly + /// that. See also docs about `forward_parent` in `Event::Start`. + /// + /// Given completed events `[START, FINISH]` and its corresponding + /// `CompletedMarker(pos: 0, _)`. + /// Append a new `START` events as `[START, FINISH, NEWSTART]`, + /// then mark `NEWSTART` as `START`'s parent with saving its relative + /// distance to `NEWSTART` into forward_parent(=2 in this case); + pub(crate) fn precede(self, p: &mut Parser) -> Marker { + let new_pos = p.start(); + let idx = self.start_pos as usize; + match &mut p.events[idx] { + Event::Start { forward_parent, .. } => { + *forward_parent = Some(new_pos.pos - self.start_pos); + } + _ => unreachable!(), + } + new_pos + } + + /// Undo this completion and turns into a `Marker` + pub(crate) fn undo_completion(self, p: &mut Parser) -> Marker { + let start_idx = self.start_pos as usize; + let finish_idx = self.finish_pos as usize; + match &mut p.events[start_idx] { + Event::Start { kind, forward_parent: None } => *kind = TOMBSTONE, + _ => unreachable!(), + } + match &mut p.events[finish_idx] { + slot @ Event::Finish => *slot = Event::tombstone(), + _ => unreachable!(), + } + Marker::new(self.start_pos) + } + + pub(crate) fn kind(&self) -> SyntaxKind { + self.kind + } +} -- cgit v1.2.3