From b5021411a84822cb3f1e3aeffad9550dd15bdeb6 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 16 Sep 2018 12:54:24 +0300 Subject: rename all things --- crates/ra_syntax/src/parser_impl/event.rs | 154 ++++++++++++++++++++++++ crates/ra_syntax/src/parser_impl/input.rs | 86 +++++++++++++ crates/ra_syntax/src/parser_impl/mod.rs | 194 ++++++++++++++++++++++++++++++ 3 files changed, 434 insertions(+) create mode 100644 crates/ra_syntax/src/parser_impl/event.rs create mode 100644 crates/ra_syntax/src/parser_impl/input.rs create mode 100644 crates/ra_syntax/src/parser_impl/mod.rs (limited to 'crates/ra_syntax/src/parser_impl') diff --git a/crates/ra_syntax/src/parser_impl/event.rs b/crates/ra_syntax/src/parser_impl/event.rs new file mode 100644 index 000000000..9fd56b996 --- /dev/null +++ b/crates/ra_syntax/src/parser_impl/event.rs @@ -0,0 +1,154 @@ +//! This module provides a way to construct a `File`. +//! It is intended to be completely decoupled from the +//! parser, so as to allow to evolve the tree representation +//! and the parser algorithm independently. +//! +//! The `Sink` trait is the bridge between the parser and the +//! tree builder: the parser produces a stream of events like +//! `start node`, `finish node`, and `FileBuilder` converts +//! this stream to a real tree. +use std::mem; +use { + lexer::Token, + parser_impl::Sink, + SyntaxKind::{self, TOMBSTONE}, +}; + + +/// `Parser` produces a flat list of `Event`s. +/// They are converted to a tree-structure in +/// a separate pass, via `TreeBuilder`. +#[derive(Debug)] +pub(crate) enum Event { + /// This event signifies the start of the node. + /// It should be either abandoned (in which case the + /// `kind` is `TOMBSTONE`, and the event is ignored), + /// or completed via a `Finish` event. + /// + /// All tokens between a `Start` and a `Finish` would + /// become the children of the respective node. + /// + /// For left-recursive syntactic constructs, the parser produces + /// a child node before it sees a parent. `forward_parent` + /// exists to allow to tweak parent-child relationships. + /// + /// Consider this path + /// + /// foo::bar + /// + /// The events for it would look like this: + /// + /// + /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH + /// | /\ + /// | | + /// +------forward-parent------+ + /// + /// And the tree would look like this + /// + /// +--PATH---------+ + /// | | | + /// | | | + /// | '::' 'bar' + /// | + /// PATH + /// | + /// 'foo' + /// + /// See also `CompletedMarker::precede`. + Start { + kind: SyntaxKind, + forward_parent: Option, + }, + + /// Complete the previous `Start` event + Finish, + + /// Produce a single leaf-element. + /// `n_raw_tokens` is used to glue complex contextual tokens. + /// For example, lexer tokenizes `>>` as `>`, `>`, and + /// `n_raw_tokens = 2` is used to produced a single `>>`. + Token { + kind: SyntaxKind, + n_raw_tokens: u8, + }, + + Error { + msg: String, + }, +} + + +pub(super) fn process<'a, S: Sink<'a>>(builder: &mut S, tokens: &[Token], mut events: Vec) { + fn tombstone() -> Event { + Event::Start { kind: TOMBSTONE, forward_parent: None } + } + let eat_ws = |idx: &mut usize, builder: &mut S| { + while let Some(token) = tokens.get(*idx) { + if !token.kind.is_trivia() { + break; + } + builder.leaf(token.kind, token.len); + *idx += 1 + } + }; + + let events: &mut [Event] = &mut events; + let mut depth = 0; + let mut forward_parents = Vec::new(); + let mut next_tok_idx = 0; + for i in 0..events.len() { + match mem::replace(&mut events[i], tombstone()) { + Event::Start { + kind: TOMBSTONE, .. + } => (), + + Event::Start { kind, forward_parent } => { + forward_parents.push(kind); + let mut idx = i; + let mut fp = forward_parent; + while let Some(fwd) = fp { + idx += fwd as usize; + fp = match mem::replace(&mut events[idx], tombstone()) { + Event::Start { + kind, + forward_parent, + } => { + forward_parents.push(kind); + forward_parent + }, + _ => unreachable!(), + }; + } + for kind in forward_parents.drain(..).rev() { + if depth > 0 { + eat_ws(&mut next_tok_idx, builder); + } + depth += 1; + builder.start_internal(kind); + } + } + Event::Finish => { + depth -= 1; + if depth == 0 { + eat_ws(&mut next_tok_idx, builder); + } + + builder.finish_internal(); + } + Event::Token { + kind, + mut n_raw_tokens, + } => { + eat_ws(&mut next_tok_idx, builder); + let mut len = 0.into(); + for _ in 0..n_raw_tokens { + len += tokens[next_tok_idx].len; + next_tok_idx += 1; + } + builder.leaf(kind, len); + } + Event::Error { msg } => builder.error(msg), + } + } +} diff --git a/crates/ra_syntax/src/parser_impl/input.rs b/crates/ra_syntax/src/parser_impl/input.rs new file mode 100644 index 000000000..c0fe4d488 --- /dev/null +++ b/crates/ra_syntax/src/parser_impl/input.rs @@ -0,0 +1,86 @@ +use {lexer::Token, SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit}; + +use std::ops::{Add, AddAssign}; + +pub(crate) struct ParserInput<'t> { + text: &'t str, + start_offsets: Vec, + tokens: Vec, // non-whitespace tokens +} + +impl<'t> ParserInput<'t> { + pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { + let mut tokens = Vec::new(); + let mut start_offsets = Vec::new(); + let mut len = 0.into(); + for &token in raw_tokens.iter() { + if !token.kind.is_trivia() { + tokens.push(token); + start_offsets.push(len); + } + len += token.len; + } + + ParserInput { + text, + start_offsets, + tokens, + } + } + + pub fn kind(&self, pos: InputPosition) -> SyntaxKind { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return EOF; + } + self.tokens[idx].kind + } + + pub fn len(&self, pos: InputPosition) -> TextUnit { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return 0.into(); + } + self.tokens[idx].len + } + + pub fn start(&self, pos: InputPosition) -> TextUnit { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return 0.into(); + } + self.start_offsets[idx] + } + + pub fn text(&self, pos: InputPosition) -> &'t str { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return ""; + } + let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); + &self.text[range] + } +} + +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub(crate) struct InputPosition(u32); + +impl InputPosition { + pub fn new() -> Self { + InputPosition(0) + } +} + +impl Add for InputPosition { + type Output = InputPosition; + + fn add(self, rhs: u32) -> InputPosition { + InputPosition(self.0 + rhs) + } +} + +impl AddAssign for InputPosition { + fn add_assign(&mut self, rhs: u32) { + self.0 += rhs + } +} diff --git a/crates/ra_syntax/src/parser_impl/mod.rs b/crates/ra_syntax/src/parser_impl/mod.rs new file mode 100644 index 000000000..b343b404f --- /dev/null +++ b/crates/ra_syntax/src/parser_impl/mod.rs @@ -0,0 +1,194 @@ +mod event; +mod input; + +use std::cell::Cell; + +use { + lexer::Token, + parser_api::Parser, + parser_impl::{ + event::{process, Event}, + input::{InputPosition, ParserInput}, + }, + TextUnit, +}; + +use SyntaxKind::{self, EOF, TOMBSTONE}; + +pub(crate) trait Sink<'a> { + type Tree; + + fn new(text: &'a str) -> Self; + + fn leaf(&mut self, kind: SyntaxKind, len: TextUnit); + fn start_internal(&mut self, kind: SyntaxKind); + fn finish_internal(&mut self); + fn error(&mut self, err: String); + fn finish(self) -> Self::Tree; +} + +/// Parse a sequence of tokens into the representative node tree +pub(crate) fn parse_with<'a, S: Sink<'a>>( + text: &'a str, + tokens: &[Token], + parser: fn(&mut Parser), +) -> S::Tree { + let events = { + let input = input::ParserInput::new(text, tokens); + let parser_impl = ParserImpl::new(&input); + let mut parser_api = Parser(parser_impl); + parser(&mut parser_api); + parser_api.0.into_events() + }; + let mut sink = S::new(text); + process(&mut sink, tokens, events); + sink.finish() +} + +/// Implementation details of `Parser`, extracted +/// to a separate struct in order not to pollute +/// the public API of the `Parser`. +pub(crate) struct ParserImpl<'t> { + inp: &'t ParserInput<'t>, + + pos: InputPosition, + events: Vec, + steps: Cell, +} + +impl<'t> ParserImpl<'t> { + pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { + ParserImpl { + inp, + + pos: InputPosition::new(), + events: Vec::new(), + steps: Cell::new(0), + } + } + + pub(crate) fn into_events(self) -> Vec { + assert_eq!(self.nth(0), EOF); + self.events + } + + pub(super) fn next2(&self) -> Option<(SyntaxKind, SyntaxKind)> { + let c1 = self.inp.kind(self.pos); + let c2 = self.inp.kind(self.pos + 1); + if self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos) { + Some((c1, c2)) + } else { + None + } + } + + pub(super) fn next3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { + let c1 = self.inp.kind(self.pos); + let c2 = self.inp.kind(self.pos + 1); + let c3 = self.inp.kind(self.pos + 2); + if self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos) + && self.inp.start(self.pos + 2) == self.inp.start(self.pos + 1) + self.inp.len(self.pos + 1){ + Some((c1, c2, c3)) + } else { + None + } + } + + pub(super) fn nth(&self, n: u32) -> SyntaxKind { + let steps = self.steps.get(); + if steps > 10_000_000 { + panic!("the parser seems stuck"); + } + self.steps.set(steps + 1); + + self.inp.kind(self.pos + n) + } + + pub(super) fn at_kw(&self, t: &str) -> bool { + self.inp.text(self.pos) == t + } + + pub(super) fn start(&mut self) -> u32 { + let pos = self.events.len() as u32; + self.event(Event::Start { + kind: TOMBSTONE, + forward_parent: None, + }); + pos + } + + pub(super) fn bump(&mut self) { + let kind = self.nth(0); + if kind == EOF { + return; + } + self.do_bump(kind, 1); + } + + pub(super) fn bump_remap(&mut self, kind: SyntaxKind) { + if self.nth(0) == EOF { + // TODO: panic!? + return; + } + self.do_bump(kind, 1); + } + + pub(super) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { + self.do_bump(kind, n); + } + + fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { + self.pos += u32::from(n_raw_tokens); + self.event(Event::Token { + kind, + n_raw_tokens, + }); + } + + pub(super) fn error(&mut self, msg: String) { + self.event(Event::Error { msg }) + } + + pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { + match self.events[pos as usize] { + Event::Start { + kind: ref mut slot, .. + } => { + *slot = kind; + } + _ => unreachable!(), + } + self.event(Event::Finish); + } + + pub(super) fn abandon(&mut self, pos: u32) { + let idx = pos as usize; + if idx == self.events.len() - 1 { + match self.events.pop() { + Some(Event::Start { + kind: TOMBSTONE, + forward_parent: None, + }) => (), + _ => unreachable!(), + } + } + } + + pub(super) fn precede(&mut self, pos: u32) -> u32 { + let new_pos = self.start(); + match self.events[pos as usize] { + Event::Start { + ref mut forward_parent, + .. + } => { + *forward_parent = Some(new_pos - pos); + } + _ => unreachable!(), + } + new_pos + } + + fn event(&mut self, event: Event) { + self.events.push(event) + } +} -- cgit v1.2.3