From 7912189ec304b28c4df0030b5282cf3d21074154 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 31 Jul 2018 23:38:19 +0300 Subject: reorganize --- src/parser_impl/event.rs | 156 +++++++++++++++++++++++++++++++++++++++++++++++ src/parser_impl/input.rs | 71 +++++++++++++++++++++ src/parser_impl/mod.rs | 155 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 382 insertions(+) create mode 100644 src/parser_impl/event.rs create mode 100644 src/parser_impl/input.rs create mode 100644 src/parser_impl/mod.rs (limited to 'src/parser_impl') diff --git a/src/parser_impl/event.rs b/src/parser_impl/event.rs new file mode 100644 index 000000000..eb5d0a4be --- /dev/null +++ b/src/parser_impl/event.rs @@ -0,0 +1,156 @@ +//! This module provides a way to construct a `File`. +//! It is intended to be completely decoupled from the +//! parser, so as to allow to evolve the tree representation +//! and the parser algorithm independently. +//! +//! The `Sink` trait is the bridge between the parser and the +//! tree builder: the parser produces a stream of events like +//! `start node`, `finish node`, and `FileBuilder` converts +//! this stream to a real tree. +use { + lexer::Token, + parser_impl::Sink, + SyntaxKind::{self, TOMBSTONE}, +}; + +/// `Parser` produces a flat list of `Event`s. +/// They are converted to a tree-structure in +/// a separate pass, via `TreeBuilder`. +#[derive(Debug)] +pub(crate) enum Event { + /// This event signifies the start of the node. + /// It should be either abandoned (in which case the + /// `kind` is `TOMBSTONE`, and the event is ignored), + /// or completed via a `Finish` event. + /// + /// All tokens between a `Start` and a `Finish` would + /// become the children of the respective node. + /// + /// For left-recursive syntactic constructs, the parser produces + /// a child node before it sees a parent. `forward_parent` + /// exists to allow to tweak parent-child relationships. + /// + /// Consider this path + /// + /// foo::bar + /// + /// The events for it would look like this: + /// + /// + /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH + /// | /\ + /// | | + /// +------forward-parent------+ + /// + /// And the tree would look like this + /// + /// +--PATH---------+ + /// | | | + /// | | | + /// | '::' 'bar' + /// | + /// PATH + /// | + /// 'foo' + /// + /// See also `CompletedMarker::precede`. + Start { + kind: SyntaxKind, + forward_parent: Option, + }, + + /// Complete the previous `Start` event + Finish, + + /// Produce a single leaf-element. + /// `n_raw_tokens` is used to glue complex contextual tokens. + /// For example, lexer tokenizes `>>` as `>`, `>`, and + /// `n_raw_tokens = 2` is used to produced a single `>>`. + Token { + kind: SyntaxKind, + n_raw_tokens: u8, + }, + + Error { + msg: String, + }, +} + +pub(super) fn process(builder: &mut impl Sink, tokens: &[Token], events: Vec) { + let mut idx = 0; + + let mut holes = Vec::new(); + let mut forward_parents = Vec::new(); + + for (i, event) in events.iter().enumerate() { + if holes.last() == Some(&i) { + holes.pop(); + continue; + } + + match event { + &Event::Start { + kind: TOMBSTONE, .. + } => (), + + &Event::Start { .. } => { + forward_parents.clear(); + let mut idx = i; + loop { + let (kind, fwd) = match events[idx] { + Event::Start { + kind, + forward_parent, + } => (kind, forward_parent), + _ => unreachable!(), + }; + forward_parents.push((idx, kind)); + if let Some(fwd) = fwd { + idx += fwd as usize; + } else { + break; + } + } + for &(idx, kind) in forward_parents.iter().into_iter().rev() { + builder.start_internal(kind); + holes.push(idx); + } + holes.pop(); + } + &Event::Finish => { + while idx < tokens.len() { + let token = tokens[idx]; + if token.kind.is_trivia() { + idx += 1; + builder.leaf(token.kind, token.len); + } else { + break; + } + } + builder.finish_internal() + } + &Event::Token { + kind, + mut n_raw_tokens, + } => { + // FIXME: currently, we attach whitespace to some random node + // this should be done in a sensible manner instead + loop { + let token = tokens[idx]; + if !token.kind.is_trivia() { + break; + } + builder.leaf(token.kind, token.len); + idx += 1 + } + let mut len = 0.into(); + for _ in 0..n_raw_tokens { + len += tokens[idx].len; + idx += 1; + } + builder.leaf(kind, len); + } + &Event::Error { ref msg } => builder.error(msg.clone()), + } + } +} diff --git a/src/parser_impl/input.rs b/src/parser_impl/input.rs new file mode 100644 index 000000000..db76364b2 --- /dev/null +++ b/src/parser_impl/input.rs @@ -0,0 +1,71 @@ +use {lexer::Token, SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit}; + +use std::ops::{Add, AddAssign}; + +pub(crate) struct ParserInput<'t> { + text: &'t str, + start_offsets: Vec, + tokens: Vec, // non-whitespace tokens +} + +impl<'t> ParserInput<'t> { + pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { + let mut tokens = Vec::new(); + let mut start_offsets = Vec::new(); + let mut len = 0.into(); + for &token in raw_tokens.iter() { + if !token.kind.is_trivia() { + tokens.push(token); + start_offsets.push(len); + } + len += token.len; + } + + ParserInput { + text, + start_offsets, + tokens, + } + } + + pub fn kind(&self, pos: InputPosition) -> SyntaxKind { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return EOF; + } + self.tokens[idx].kind + } + + #[allow(unused)] + pub fn text(&self, pos: InputPosition) -> &'t str { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return ""; + } + let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); + &self.text[range] + } +} + +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub(crate) struct InputPosition(u32); + +impl InputPosition { + pub fn new() -> Self { + InputPosition(0) + } +} + +impl Add for InputPosition { + type Output = InputPosition; + + fn add(self, rhs: u32) -> InputPosition { + InputPosition(self.0 + rhs) + } +} + +impl AddAssign for InputPosition { + fn add_assign(&mut self, rhs: u32) { + self.0 += rhs + } +} diff --git a/src/parser_impl/mod.rs b/src/parser_impl/mod.rs new file mode 100644 index 000000000..b58094be3 --- /dev/null +++ b/src/parser_impl/mod.rs @@ -0,0 +1,155 @@ +mod event; +mod input; + +use { + grammar, + lexer::Token, + parser_api::Parser, + parser_impl::{ + event::{process, Event}, + input::{InputPosition, ParserInput}, + }, + TextUnit, +}; + +use SyntaxKind::{self, EOF, TOMBSTONE}; + +pub(crate) trait Sink { + type Tree; + + fn new(text: String) -> Self; + + fn leaf(&mut self, kind: SyntaxKind, len: TextUnit); + fn start_internal(&mut self, kind: SyntaxKind); + fn finish_internal(&mut self); + fn error(&mut self, err: String); + fn finish(self) -> Self::Tree; +} + +/// Parse a sequence of tokens into the representative node tree +pub(crate) fn parse(text: String, tokens: &[Token]) -> S::Tree { + let events = { + let input = input::ParserInput::new(&text, tokens); + let parser_impl = ParserImpl::new(&input); + let mut parser_api = Parser(parser_impl); + grammar::file(&mut parser_api); + parser_api.0.into_events() + }; + let mut sink = S::new(text); + process(&mut sink, tokens, events); + sink.finish() +} + +/// Implementation details of `Parser`, extracted +/// to a separate struct in order not to pollute +/// the public API of the `Parser`. +pub(crate) struct ParserImpl<'t> { + inp: &'t ParserInput<'t>, + + pos: InputPosition, + events: Vec, +} + +impl<'t> ParserImpl<'t> { + pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { + ParserImpl { + inp, + + pos: InputPosition::new(), + events: Vec::new(), + } + } + + pub(crate) fn into_events(self) -> Vec { + assert_eq!(self.nth(0), EOF); + self.events + } + + pub(super) fn nth(&self, n: u32) -> SyntaxKind { + self.inp.kind(self.pos + n) + } + + pub(super) fn at_kw(&self, t: &str) -> bool { + self.inp.text(self.pos) == t + } + + pub(super) fn start(&mut self) -> u32 { + let pos = self.events.len() as u32; + self.event(Event::Start { + kind: TOMBSTONE, + forward_parent: None, + }); + pos + } + + pub(super) fn bump(&mut self) { + let kind = self.nth(0); + if kind == EOF { + return; + } + self.do_bump(kind); + } + + pub(super) fn bump_remap(&mut self, kind: SyntaxKind) { + if self.nth(0) == EOF { + // TODO: panic!? + return; + } + self.do_bump(kind); + } + + fn do_bump(&mut self, kind: SyntaxKind) { + self.pos += 1; + self.event(Event::Token { + kind, + n_raw_tokens: 1, + }); + } + + pub(super) fn error(&mut self, msg: String) { + self.event(Event::Error { msg }) + } + + pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { + match self.events[pos as usize] { + Event::Start { + kind: ref mut slot, .. + } => { + *slot = kind; + } + _ => unreachable!(), + } + self.event(Event::Finish); + } + + pub(super) fn abandon(&mut self, pos: u32) { + let idx = pos as usize; + if idx == self.events.len() - 1 { + match self.events.pop() { + Some(Event::Start { + kind: TOMBSTONE, + forward_parent: None, + }) => (), + _ => unreachable!(), + } + } + } + + pub(super) fn precede(&mut self, pos: u32) -> u32 { + let new_pos = self.start(); + match self.events[pos as usize] { + Event::Start { + ref mut forward_parent, + .. + } => { + *forward_parent = Some(new_pos - pos); + } + _ => unreachable!(), + } + new_pos + } + + fn event(&mut self, event: Event) { + self.events.push(event) + } +} -- cgit v1.2.3