From 61992dc1cd4956038e3c15439c1203f21e05af06 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 23:05:59 +0300 Subject: simplify --- crates/ra_syntax/src/parsing.rs | 4 +- crates/ra_syntax/src/parsing/grammar.rs | 2 +- crates/ra_syntax/src/parsing/parser.rs | 271 ++++++++++++++++++++++++++++ crates/ra_syntax/src/parsing/parser_api.rs | 271 ---------------------------- crates/ra_syntax/src/parsing/parser_impl.rs | 25 --- crates/ra_syntax/src/parsing/reparsing.rs | 2 +- 6 files changed, 275 insertions(+), 300 deletions(-) create mode 100644 crates/ra_syntax/src/parsing/parser.rs delete mode 100644 crates/ra_syntax/src/parsing/parser_api.rs delete mode 100644 crates/ra_syntax/src/parsing/parser_impl.rs diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 5de6ff8c1..941ec501e 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -4,7 +4,7 @@ mod builder; mod lexer; mod event; mod input; -mod parser_api; +mod parser; mod grammar; mod reparsing; @@ -14,7 +14,7 @@ use crate::{ builder::GreenBuilder, input::ParserInput, event::EventProcessor, - parser_api::Parser, + parser::Parser, }, syntax_node::GreenNode, }; diff --git a/crates/ra_syntax/src/parsing/grammar.rs b/crates/ra_syntax/src/parsing/grammar.rs index bcdcd9f57..7ca9c223c 100644 --- a/crates/ra_syntax/src/parsing/grammar.rs +++ b/crates/ra_syntax/src/parsing/grammar.rs @@ -41,7 +41,7 @@ use crate::{ SyntaxKind::{self, *}, parsing::{ token_set::TokenSet, - parser_api::{CompletedMarker, Marker, Parser} + parser::{CompletedMarker, Marker, Parser} }, }; diff --git a/crates/ra_syntax/src/parsing/parser.rs b/crates/ra_syntax/src/parsing/parser.rs new file mode 100644 index 000000000..988fcb518 --- /dev/null +++ b/crates/ra_syntax/src/parsing/parser.rs @@ -0,0 +1,271 @@ +use std::cell::Cell; + +use drop_bomb::DropBomb; + +use crate::{ + syntax_error::ParseError, + SyntaxKind::{self, ERROR, EOF, TOMBSTONE}, + parsing::{ + TokenSource, + token_set::TokenSet, + event::Event, + }, +}; + +/// `Parser` struct provides the low-level API for +/// navigating through the stream of tokens and +/// constructing the parse tree. The actual parsing +/// happens in the `grammar` module. +/// +/// However, the result of this `Parser` is not a real +/// tree, but rather a flat stream of events of the form +/// "start expression, consume number literal, +/// finish expression". See `Event` docs for more. +pub(crate) struct Parser<'t> { + token_source: &'t dyn TokenSource, + token_pos: usize, + events: Vec, + steps: Cell, +} + +impl<'t> Parser<'t> { + pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { + Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) } + } + + pub(crate) fn finish(self) -> Vec { + self.events + } + + /// Returns the kind of the current token. + /// If parser has already reached the end of input, + /// the special `EOF` kind is returned. + pub(crate) fn current(&self) -> SyntaxKind { + self.nth(0) + } + + /// Returns the kinds of the current two tokens, if they are not separated + /// by trivia. + /// + /// Useful for parsing things like `>>`. + pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { + let c1 = self.token_source.token_kind(self.token_pos); + let c2 = self.token_source.token_kind(self.token_pos + 1); + if self.token_source.is_token_joint_to_next(self.token_pos) { + Some((c1, c2)) + } else { + None + } + } + + /// Returns the kinds of the current three tokens, if they are not separated + /// by trivia. + /// + /// Useful for parsing things like `=>>`. + pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { + let c1 = self.token_source.token_kind(self.token_pos); + let c2 = self.token_source.token_kind(self.token_pos + 1); + let c3 = self.token_source.token_kind(self.token_pos + 2); + if self.token_source.is_token_joint_to_next(self.token_pos) + && self.token_source.is_token_joint_to_next(self.token_pos + 1) + { + Some((c1, c2, c3)) + } else { + None + } + } + + /// Lookahead operation: returns the kind of the next nth + /// token. + pub(crate) fn nth(&self, n: usize) -> SyntaxKind { + let steps = self.steps.get(); + assert!(steps <= 10_000_000, "the parser seems stuck"); + self.steps.set(steps + 1); + self.token_source.token_kind(self.token_pos + n) + } + + /// Checks if the current token is `kind`. + pub(crate) fn at(&self, kind: SyntaxKind) -> bool { + self.current() == kind + } + + /// Checks if the current token is in `kinds`. + pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool { + kinds.contains(self.current()) + } + + /// Checks if the current token is contextual keyword with text `t`. + pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { + self.token_source.is_keyword(self.token_pos, kw) + } + + /// Starts a new node in the syntax tree. All nodes and tokens + /// consumed between the `start` and the corresponding `Marker::complete` + /// belong to the same node. + pub(crate) fn start(&mut self) -> Marker { + let pos = self.events.len() as u32; + self.push_event(Event::tombstone()); + Marker::new(pos) + } + + /// Advances the parser by one token unconditionally. + pub(crate) fn bump(&mut self) { + let kind = self.nth(0); + if kind == EOF { + return; + } + self.do_bump(kind, 1); + } + + /// Advances the parser by one token, remapping its kind. + /// This is useful to create contextual keywords from + /// identifiers. For example, the lexer creates an `union` + /// *identifier* token, but the parser remaps it to the + /// `union` keyword, and keyword is what ends up in the + /// final tree. + pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { + if self.nth(0) == EOF { + // TODO: panic!? + return; + } + self.do_bump(kind, 1); + } + + /// Advances the parser by `n` tokens, remapping its kind. + /// This is useful to create compound tokens from parts. For + /// example, an `<<` token is two consecutive remapped `<` tokens + pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { + self.do_bump(kind, n); + } + + /// Emit error with the `message` + /// TODO: this should be much more fancy and support + /// structured errors with spans and notes, like rustc + /// does. + pub(crate) fn error>(&mut self, message: T) { + let msg = ParseError(message.into()); + self.push_event(Event::Error { msg }) + } + + /// Consume the next token if `kind` matches. + pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { + if !self.at(kind) { + return false; + } + self.bump(); + true + } + + /// Consume the next token if it is `kind` or emit an error + /// otherwise. + pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { + if self.eat(kind) { + return true; + } + self.error(format!("expected {:?}", kind)); + false + } + + /// Create an error node and consume the next token. + pub(crate) fn err_and_bump(&mut self, message: &str) { + self.err_recover(message, TokenSet::empty()); + } + + /// Create an error node and consume the next token. + pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) { + if self.at(SyntaxKind::L_CURLY) || self.at(SyntaxKind::R_CURLY) || self.at_ts(recovery) { + self.error(message); + } else { + let m = self.start(); + self.error(message); + self.bump(); + m.complete(self, ERROR); + }; + } + + fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { + self.token_pos += usize::from(n_raw_tokens); + self.push_event(Event::Token { kind, n_raw_tokens }); + } + + fn push_event(&mut self, event: Event) { + self.events.push(event) + } +} + +/// See `Parser::start`. +pub(crate) struct Marker { + pos: u32, + bomb: DropBomb, +} + +impl Marker { + fn new(pos: u32) -> Marker { + Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") } + } + + /// Finishes the syntax tree node and assigns `kind` to it, + /// and mark the create a `CompletedMarker` for possible future + /// operation like `.precede()` to deal with forward_parent. + pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { + self.bomb.defuse(); + let idx = self.pos as usize; + match p.events[idx] { + Event::Start { kind: ref mut slot, .. } => { + *slot = kind; + } + _ => unreachable!(), + } + p.push_event(Event::Finish); + CompletedMarker::new(self.pos, kind) + } + + /// Abandons the syntax tree node. All its children + /// are attached to its parent instead. + pub(crate) fn abandon(mut self, p: &mut Parser) { + self.bomb.defuse(); + let idx = self.pos as usize; + if idx == p.events.len() - 1 { + match p.events.pop() { + Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), + _ => unreachable!(), + } + } + } +} + +pub(crate) struct CompletedMarker(u32, SyntaxKind); + +impl CompletedMarker { + fn new(pos: u32, kind: SyntaxKind) -> Self { + CompletedMarker(pos, kind) + } + + /// This method allows to create a new node which starts + /// *before* the current one. That is, parser could start + /// node `A`, then complete it, and then after parsing the + /// whole `A`, decide that it should have started some node + /// `B` before starting `A`. `precede` allows to do exactly + /// that. See also docs about `forward_parent` in `Event::Start`. + /// + /// Given completed events `[START, FINISH]` and its corresponding + /// `CompletedMarker(pos: 0, _)`. + /// Append a new `START` events as `[START, FINISH, NEWSTART]`, + /// then mark `NEWSTART` as `START`'s parent with saving its relative + /// distance to `NEWSTART` into forward_parent(=2 in this case); + pub(crate) fn precede(self, p: &mut Parser) -> Marker { + let new_pos = p.start(); + let idx = self.0 as usize; + match p.events[idx] { + Event::Start { ref mut forward_parent, .. } => { + *forward_parent = Some(new_pos.pos - self.0); + } + _ => unreachable!(), + } + new_pos + } + + pub(crate) fn kind(&self) -> SyntaxKind { + self.1 + } +} diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs deleted file mode 100644 index 988fcb518..000000000 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ /dev/null @@ -1,271 +0,0 @@ -use std::cell::Cell; - -use drop_bomb::DropBomb; - -use crate::{ - syntax_error::ParseError, - SyntaxKind::{self, ERROR, EOF, TOMBSTONE}, - parsing::{ - TokenSource, - token_set::TokenSet, - event::Event, - }, -}; - -/// `Parser` struct provides the low-level API for -/// navigating through the stream of tokens and -/// constructing the parse tree. The actual parsing -/// happens in the `grammar` module. -/// -/// However, the result of this `Parser` is not a real -/// tree, but rather a flat stream of events of the form -/// "start expression, consume number literal, -/// finish expression". See `Event` docs for more. -pub(crate) struct Parser<'t> { - token_source: &'t dyn TokenSource, - token_pos: usize, - events: Vec, - steps: Cell, -} - -impl<'t> Parser<'t> { - pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { - Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) } - } - - pub(crate) fn finish(self) -> Vec { - self.events - } - - /// Returns the kind of the current token. - /// If parser has already reached the end of input, - /// the special `EOF` kind is returned. - pub(crate) fn current(&self) -> SyntaxKind { - self.nth(0) - } - - /// Returns the kinds of the current two tokens, if they are not separated - /// by trivia. - /// - /// Useful for parsing things like `>>`. - pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - let c1 = self.token_source.token_kind(self.token_pos); - let c2 = self.token_source.token_kind(self.token_pos + 1); - if self.token_source.is_token_joint_to_next(self.token_pos) { - Some((c1, c2)) - } else { - None - } - } - - /// Returns the kinds of the current three tokens, if they are not separated - /// by trivia. - /// - /// Useful for parsing things like `=>>`. - pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - let c1 = self.token_source.token_kind(self.token_pos); - let c2 = self.token_source.token_kind(self.token_pos + 1); - let c3 = self.token_source.token_kind(self.token_pos + 2); - if self.token_source.is_token_joint_to_next(self.token_pos) - && self.token_source.is_token_joint_to_next(self.token_pos + 1) - { - Some((c1, c2, c3)) - } else { - None - } - } - - /// Lookahead operation: returns the kind of the next nth - /// token. - pub(crate) fn nth(&self, n: usize) -> SyntaxKind { - let steps = self.steps.get(); - assert!(steps <= 10_000_000, "the parser seems stuck"); - self.steps.set(steps + 1); - self.token_source.token_kind(self.token_pos + n) - } - - /// Checks if the current token is `kind`. - pub(crate) fn at(&self, kind: SyntaxKind) -> bool { - self.current() == kind - } - - /// Checks if the current token is in `kinds`. - pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool { - kinds.contains(self.current()) - } - - /// Checks if the current token is contextual keyword with text `t`. - pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { - self.token_source.is_keyword(self.token_pos, kw) - } - - /// Starts a new node in the syntax tree. All nodes and tokens - /// consumed between the `start` and the corresponding `Marker::complete` - /// belong to the same node. - pub(crate) fn start(&mut self) -> Marker { - let pos = self.events.len() as u32; - self.push_event(Event::tombstone()); - Marker::new(pos) - } - - /// Advances the parser by one token unconditionally. - pub(crate) fn bump(&mut self) { - let kind = self.nth(0); - if kind == EOF { - return; - } - self.do_bump(kind, 1); - } - - /// Advances the parser by one token, remapping its kind. - /// This is useful to create contextual keywords from - /// identifiers. For example, the lexer creates an `union` - /// *identifier* token, but the parser remaps it to the - /// `union` keyword, and keyword is what ends up in the - /// final tree. - pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { - if self.nth(0) == EOF { - // TODO: panic!? - return; - } - self.do_bump(kind, 1); - } - - /// Advances the parser by `n` tokens, remapping its kind. - /// This is useful to create compound tokens from parts. For - /// example, an `<<` token is two consecutive remapped `<` tokens - pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { - self.do_bump(kind, n); - } - - /// Emit error with the `message` - /// TODO: this should be much more fancy and support - /// structured errors with spans and notes, like rustc - /// does. - pub(crate) fn error>(&mut self, message: T) { - let msg = ParseError(message.into()); - self.push_event(Event::Error { msg }) - } - - /// Consume the next token if `kind` matches. - pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { - if !self.at(kind) { - return false; - } - self.bump(); - true - } - - /// Consume the next token if it is `kind` or emit an error - /// otherwise. - pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { - if self.eat(kind) { - return true; - } - self.error(format!("expected {:?}", kind)); - false - } - - /// Create an error node and consume the next token. - pub(crate) fn err_and_bump(&mut self, message: &str) { - self.err_recover(message, TokenSet::empty()); - } - - /// Create an error node and consume the next token. - pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) { - if self.at(SyntaxKind::L_CURLY) || self.at(SyntaxKind::R_CURLY) || self.at_ts(recovery) { - self.error(message); - } else { - let m = self.start(); - self.error(message); - self.bump(); - m.complete(self, ERROR); - }; - } - - fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { - self.token_pos += usize::from(n_raw_tokens); - self.push_event(Event::Token { kind, n_raw_tokens }); - } - - fn push_event(&mut self, event: Event) { - self.events.push(event) - } -} - -/// See `Parser::start`. -pub(crate) struct Marker { - pos: u32, - bomb: DropBomb, -} - -impl Marker { - fn new(pos: u32) -> Marker { - Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") } - } - - /// Finishes the syntax tree node and assigns `kind` to it, - /// and mark the create a `CompletedMarker` for possible future - /// operation like `.precede()` to deal with forward_parent. - pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { - self.bomb.defuse(); - let idx = self.pos as usize; - match p.events[idx] { - Event::Start { kind: ref mut slot, .. } => { - *slot = kind; - } - _ => unreachable!(), - } - p.push_event(Event::Finish); - CompletedMarker::new(self.pos, kind) - } - - /// Abandons the syntax tree node. All its children - /// are attached to its parent instead. - pub(crate) fn abandon(mut self, p: &mut Parser) { - self.bomb.defuse(); - let idx = self.pos as usize; - if idx == p.events.len() - 1 { - match p.events.pop() { - Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), - _ => unreachable!(), - } - } - } -} - -pub(crate) struct CompletedMarker(u32, SyntaxKind); - -impl CompletedMarker { - fn new(pos: u32, kind: SyntaxKind) -> Self { - CompletedMarker(pos, kind) - } - - /// This method allows to create a new node which starts - /// *before* the current one. That is, parser could start - /// node `A`, then complete it, and then after parsing the - /// whole `A`, decide that it should have started some node - /// `B` before starting `A`. `precede` allows to do exactly - /// that. See also docs about `forward_parent` in `Event::Start`. - /// - /// Given completed events `[START, FINISH]` and its corresponding - /// `CompletedMarker(pos: 0, _)`. - /// Append a new `START` events as `[START, FINISH, NEWSTART]`, - /// then mark `NEWSTART` as `START`'s parent with saving its relative - /// distance to `NEWSTART` into forward_parent(=2 in this case); - pub(crate) fn precede(self, p: &mut Parser) -> Marker { - let new_pos = p.start(); - let idx = self.0 as usize; - match p.events[idx] { - Event::Start { ref mut forward_parent, .. } => { - *forward_parent = Some(new_pos.pos - self.0); - } - _ => unreachable!(), - } - new_pos - } - - pub(crate) fn kind(&self) -> SyntaxKind { - self.1 - } -} diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs deleted file mode 100644 index 6eed0e656..000000000 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ /dev/null @@ -1,25 +0,0 @@ -pub(super) mod event; -pub(super) mod input; - -use crate::parsing::{ - TreeSink, TokenSource, - lexer::Token, - parser_api::Parser, - parser_impl::event::EventProcessor, -}; - -/// Parse a sequence of tokens into the representative node tree -pub(super) fn parse_with( - sink: S, - text: &str, - tokens: &[Token], - parser: fn(&mut Parser), -) -> S::Tree { - let mut events = { - let input = input::ParserInput::new(text, tokens); - let mut parser_api = Parser::new(&input); - parser(&mut parser_api); - parser_api.finish() - }; - EventProcessor::new(sink, text, tokens, &mut events).process().finish() -} diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index f45326dff..674b15f9a 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -6,7 +6,7 @@ use crate::{ parsing::{ grammar, parse_with, builder::GreenBuilder, - parser_api::Parser, + parser::Parser, lexer::{tokenize, Token}, } }; -- cgit v1.2.3