From 3517c175ac537b47dd3e36cc7fb1edd60b02c039 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 21:08:59 +0300 Subject: rename Sink -> TreeSink --- crates/ra_syntax/src/parsing/builder.rs | 4 ++-- crates/ra_syntax/src/parsing/parser_impl.rs | 4 ++-- crates/ra_syntax/src/parsing/parser_impl/event.rs | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs index 9090c60c2..118f43b2c 100644 --- a/crates/ra_syntax/src/parsing/builder.rs +++ b/crates/ra_syntax/src/parsing/builder.rs @@ -1,5 +1,5 @@ use crate::{ - parsing::parser_impl::Sink, + parsing::parser_impl::TreeSink, syntax_node::{GreenNode, RaTypes}, SmolStr, SyntaxKind, SyntaxError, }; @@ -17,7 +17,7 @@ impl GreenBuilder { } } -impl Sink for GreenBuilder { +impl TreeSink for GreenBuilder { type Tree = (GreenNode, Vec); fn leaf(&mut self, kind: SyntaxKind, text: SmolStr) { diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index 8cce1ab01..02baed76b 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs @@ -18,7 +18,7 @@ use crate::{ use crate::SyntaxKind::{self, EOF, TOMBSTONE}; -pub(super) trait Sink { +pub(super) trait TreeSink { type Tree; /// Adds new leaf to the current branch. @@ -40,7 +40,7 @@ pub(super) trait Sink { } /// Parse a sequence of tokens into the representative node tree -pub(super) fn parse_with( +pub(super) fn parse_with( sink: S, text: &str, tokens: &[Token], diff --git a/crates/ra_syntax/src/parsing/parser_impl/event.rs b/crates/ra_syntax/src/parsing/parser_impl/event.rs index 2ddbdd34d..9663fba35 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/event.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/event.rs @@ -3,7 +3,7 @@ //! parser, so as to allow to evolve the tree representation //! and the parser algorithm independently. //! -//! The `Sink` trait is the bridge between the parser and the +//! The `TreeSink` trait is the bridge between the parser and the //! tree builder: the parser produces a stream of events like //! `start node`, `finish node`, and `FileBuilder` converts //! this stream to a real tree. @@ -20,7 +20,7 @@ use crate::{ }, parsing::{ lexer::Token, - parser_impl::Sink, + parser_impl::TreeSink, }, }; @@ -93,7 +93,7 @@ impl Event { } } -pub(super) struct EventProcessor<'a, S: Sink> { +pub(super) struct EventProcessor<'a, S: TreeSink> { sink: S, text_pos: TextUnit, text: &'a str, @@ -102,7 +102,7 @@ pub(super) struct EventProcessor<'a, S: Sink> { events: &'a mut [Event], } -impl<'a, S: Sink> EventProcessor<'a, S> { +impl<'a, S: TreeSink> EventProcessor<'a, S> { pub(super) fn new( sink: S, text: &'a str, -- cgit v1.2.3 From 0c81b9deeed81bfb2cf8142af9d748317d5d71a1 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 21:50:07 +0300 Subject: route parsing via TokenSource trait --- crates/ra_syntax/src/parsing/parser_api.rs | 4 +- crates/ra_syntax/src/parsing/parser_impl.rs | 50 ++++++++-------- crates/ra_syntax/src/parsing/parser_impl/input.rs | 69 ++++++++++------------- 3 files changed, 59 insertions(+), 64 deletions(-) diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index 781c407de..813ae494c 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -17,7 +17,9 @@ use crate::{ /// tree, but rather a flat stream of events of the form /// "start expression, consume number literal, /// finish expression". See `Event` docs for more. -pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); +pub(crate) struct Parser<'t>( + pub(super) ParserImpl>, +); impl<'t> Parser<'t> { /// Returns the kind of the current token. diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index 02baed76b..c0d2b6ec1 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs @@ -1,5 +1,5 @@ mod event; -mod input; +pub(crate) mod input; use std::cell::Cell; @@ -11,7 +11,7 @@ use crate::{ parser_api::Parser, parser_impl::{ event::{Event, EventProcessor}, - input::{InputPosition, ParserInput}, + input::InputPosition, }, }, }; @@ -39,6 +39,12 @@ pub(super) trait TreeSink { fn finish(self) -> Self::Tree; } +pub(super) trait TokenSource { + fn token_kind(&self, pos: InputPosition) -> SyntaxKind; + fn is_token_joint_to_next(&self, pos: InputPosition) -> bool; + fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool; +} + /// Parse a sequence of tokens into the representative node tree pub(super) fn parse_with( sink: S, @@ -48,7 +54,7 @@ pub(super) fn parse_with( ) -> S::Tree { let mut events = { let input = input::ParserInput::new(text, tokens); - let parser_impl = ParserImpl::new(&input); + let parser_impl = ParserImpl::new(input); let mut parser_api = Parser(parser_impl); parser(&mut parser_api); parser_api.0.into_events() @@ -59,17 +65,17 @@ pub(super) fn parse_with( /// Implementation details of `Parser`, extracted /// to a separate struct in order not to pollute /// the public API of the `Parser`. -pub(super) struct ParserImpl<'t> { - parser_input: &'t ParserInput<'t>, +pub(super) struct ParserImpl { + token_source: S, pos: InputPosition, events: Vec, steps: Cell, } -impl<'t> ParserImpl<'t> { - fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { +impl ParserImpl { + fn new(token_source: S) -> ParserImpl { ParserImpl { - parser_input: inp, + token_source, pos: InputPosition::new(), events: Vec::new(), steps: Cell::new(0), @@ -82,11 +88,9 @@ impl<'t> ParserImpl<'t> { } pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - let c1 = self.parser_input.kind(self.pos); - let c2 = self.parser_input.kind(self.pos + 1); - if self.parser_input.token_start_at(self.pos + 1) - == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) - { + let c1 = self.token_source.token_kind(self.pos); + let c2 = self.token_source.token_kind(self.pos + 1); + if self.token_source.is_token_joint_to_next(self.pos) { Some((c1, c2)) } else { None @@ -94,14 +98,11 @@ impl<'t> ParserImpl<'t> { } pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - let c1 = self.parser_input.kind(self.pos); - let c2 = self.parser_input.kind(self.pos + 1); - let c3 = self.parser_input.kind(self.pos + 2); - if self.parser_input.token_start_at(self.pos + 1) - == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) - && self.parser_input.token_start_at(self.pos + 2) - == self.parser_input.token_start_at(self.pos + 1) - + self.parser_input.token_len(self.pos + 1) + let c1 = self.token_source.token_kind(self.pos); + let c2 = self.token_source.token_kind(self.pos + 1); + let c3 = self.token_source.token_kind(self.pos + 2); + if self.token_source.is_token_joint_to_next(self.pos) + && self.token_source.is_token_joint_to_next(self.pos + 1) { Some((c1, c2, c3)) } else { @@ -114,12 +115,11 @@ impl<'t> ParserImpl<'t> { let steps = self.steps.get(); assert!(steps <= 10_000_000, "the parser seems stuck"); self.steps.set(steps + 1); - - self.parser_input.kind(self.pos + n) + self.token_source.token_kind(self.pos + n) } - pub(super) fn at_kw(&self, t: &str) -> bool { - self.parser_input.token_text(self.pos) == t + pub(super) fn at_kw(&self, kw: &str) -> bool { + self.token_source.is_keyword(self.pos, kw) } /// Start parsing right behind the last event. diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs index 275d94918..8ebbd3825 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs @@ -1,10 +1,40 @@ use crate::{ SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, - parsing::lexer::Token, + parsing::{ + parser_impl::TokenSource, + lexer::Token, + }, }; use std::ops::{Add, AddAssign}; +impl<'t> TokenSource for ParserInput<'t> { + fn token_kind(&self, pos: InputPosition) -> SyntaxKind { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return EOF; + } + self.tokens[idx].kind + } + fn is_token_joint_to_next(&self, pos: InputPosition) -> bool { + let idx_curr = pos.0 as usize; + let idx_next = pos.0 as usize; + if !(idx_next < self.tokens.len()) { + return true; + } + self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] + } + fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return false; + } + let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); + + self.text[range] == *kw + } +} + pub(crate) struct ParserInput<'t> { text: &'t str, /// start position of each token(expect whitespace and comment) @@ -41,43 +71,6 @@ impl<'t> ParserInput<'t> { ParserInput { text, start_offsets, tokens } } - - /// Get the syntax kind of token at given input position. - pub fn kind(&self, pos: InputPosition) -> SyntaxKind { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return EOF; - } - self.tokens[idx].kind - } - - /// Get the length of a token at given input position. - pub fn token_len(&self, pos: InputPosition) -> TextUnit { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return 0.into(); - } - self.tokens[idx].len - } - - /// Get the start position of a taken at given input position. - pub fn token_start_at(&self, pos: InputPosition) -> TextUnit { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return 0.into(); - } - self.start_offsets[idx] - } - - /// Get the raw text of a token at given input position. - pub fn token_text(&self, pos: InputPosition) -> &'t str { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return ""; - } - let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); - &self.text[range] - } } #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -- cgit v1.2.3 From d2bce118ae72ee5cf96b8c6ac687914cb842363c Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 22:02:03 +0300 Subject: switch to dynamic dispatch for TokenSource Benchmarks show no difference. This is probably because we are bottlenecked on memory allocations, and we should fix that, but we are not optimizing for performance just yet. changes. Lines starting # with '#' will be ignored, and an empty message aborts the commit. # # On branch token-source # Changes to be committed: # modified: crates/ra_syntax/src/parsing/parser_api.rs # modified: crates/ra_syntax/src/parsing/parser_impl.rs # --- crates/ra_syntax/src/parsing/parser_api.rs | 6 ++---- crates/ra_syntax/src/parsing/parser_impl.rs | 10 +++++----- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index 813ae494c..aed23a6a4 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -4,7 +4,7 @@ use crate::{ SyntaxKind::{self, ERROR}, parsing::{ token_set::TokenSet, - parser_impl::ParserImpl + parser_impl::ParserImpl, }, }; @@ -17,9 +17,7 @@ use crate::{ /// tree, but rather a flat stream of events of the form /// "start expression, consume number literal, /// finish expression". See `Event` docs for more. -pub(crate) struct Parser<'t>( - pub(super) ParserImpl>, -); +pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); impl<'t> Parser<'t> { /// Returns the kind of the current token. diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index c0d2b6ec1..96de32fc2 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs @@ -54,7 +54,7 @@ pub(super) fn parse_with( ) -> S::Tree { let mut events = { let input = input::ParserInput::new(text, tokens); - let parser_impl = ParserImpl::new(input); + let parser_impl = ParserImpl::new(&input); let mut parser_api = Parser(parser_impl); parser(&mut parser_api); parser_api.0.into_events() @@ -65,15 +65,15 @@ pub(super) fn parse_with( /// Implementation details of `Parser`, extracted /// to a separate struct in order not to pollute /// the public API of the `Parser`. -pub(super) struct ParserImpl { - token_source: S, +pub(super) struct ParserImpl<'a> { + token_source: &'a dyn TokenSource, pos: InputPosition, events: Vec, steps: Cell, } -impl ParserImpl { - fn new(token_source: S) -> ParserImpl { +impl<'a> ParserImpl<'a> { + fn new(token_source: &'a dyn TokenSource) -> ParserImpl<'a> { ParserImpl { token_source, pos: InputPosition::new(), -- cgit v1.2.3 From 2b5e336ce7172914686b33c8ac1522911366fcf0 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 22:19:12 +0300 Subject: move abstract traits to top --- crates/ra_syntax/src/parsing.rs | 50 ++++++++++++++++++++++- crates/ra_syntax/src/parsing/builder.rs | 2 +- crates/ra_syntax/src/parsing/parser_impl.rs | 40 +++--------------- crates/ra_syntax/src/parsing/parser_impl/input.rs | 32 ++------------- 4 files changed, 59 insertions(+), 65 deletions(-) diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 761accd7b..6c2c5f78b 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -8,7 +8,7 @@ mod grammar; mod reparsing; use crate::{ - SyntaxError, + SyntaxError, SyntaxKind, SmolStr, parsing::builder::GreenBuilder, syntax_node::GreenNode, }; @@ -23,3 +23,51 @@ pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec) { parser_impl::parse_with(GreenBuilder::new(), text, &tokens, grammar::root); (green, errors) } + +/// `TreeSink` abstracts details of a particular syntax tree implementation. +trait TreeSink { + type Tree; + + /// Adds new leaf to the current branch. + fn leaf(&mut self, kind: SyntaxKind, text: SmolStr); + + /// Start new branch and make it current. + fn start_branch(&mut self, kind: SyntaxKind); + + /// Finish current branch and restore previous + /// branch as current. + fn finish_branch(&mut self); + + fn error(&mut self, error: SyntaxError); + + /// Complete tree building. Make sure that + /// `start_branch` and `finish_branch` calls + /// are paired! + fn finish(self) -> Self::Tree; +} + +/// `TokenSource` abstracts the source of the tokens parser operates one. +/// +/// Hopefully this will allow us to treat text and token trees in the same way! +trait TokenSource { + fn token_kind(&self, pos: TokenPos) -> SyntaxKind; + fn is_token_joint_to_next(&self, pos: TokenPos) -> bool; + fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool; +} + +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Default)] +pub(crate) struct TokenPos(pub u32); + +impl std::ops::Add for TokenPos { + type Output = TokenPos; + + fn add(self, rhs: u32) -> TokenPos { + TokenPos(self.0 + rhs) + } +} + +impl std::ops::AddAssign for TokenPos { + fn add_assign(&mut self, rhs: u32) { + self.0 += rhs + } +} diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs index 118f43b2c..a05e7f84b 100644 --- a/crates/ra_syntax/src/parsing/builder.rs +++ b/crates/ra_syntax/src/parsing/builder.rs @@ -1,5 +1,5 @@ use crate::{ - parsing::parser_impl::TreeSink, + parsing::TreeSink, syntax_node::{GreenNode, RaTypes}, SmolStr, SyntaxKind, SyntaxError, }; diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index 96de32fc2..89439e074 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs @@ -4,47 +4,17 @@ pub(crate) mod input; use std::cell::Cell; use crate::{ - SmolStr, - syntax_error::{ParseError, SyntaxError}, + syntax_error::ParseError, parsing::{ + TreeSink, TokenSource, TokenPos, lexer::Token, parser_api::Parser, - parser_impl::{ - event::{Event, EventProcessor}, - input::InputPosition, - }, + parser_impl::event::{Event, EventProcessor}, }, }; use crate::SyntaxKind::{self, EOF, TOMBSTONE}; -pub(super) trait TreeSink { - type Tree; - - /// Adds new leaf to the current branch. - fn leaf(&mut self, kind: SyntaxKind, text: SmolStr); - - /// Start new branch and make it current. - fn start_branch(&mut self, kind: SyntaxKind); - - /// Finish current branch and restore previous - /// branch as current. - fn finish_branch(&mut self); - - fn error(&mut self, error: SyntaxError); - - /// Complete tree building. Make sure that - /// `start_branch` and `finish_branch` calls - /// are paired! - fn finish(self) -> Self::Tree; -} - -pub(super) trait TokenSource { - fn token_kind(&self, pos: InputPosition) -> SyntaxKind; - fn is_token_joint_to_next(&self, pos: InputPosition) -> bool; - fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool; -} - /// Parse a sequence of tokens into the representative node tree pub(super) fn parse_with( sink: S, @@ -67,7 +37,7 @@ pub(super) fn parse_with( /// the public API of the `Parser`. pub(super) struct ParserImpl<'a> { token_source: &'a dyn TokenSource, - pos: InputPosition, + pos: TokenPos, events: Vec, steps: Cell, } @@ -76,7 +46,7 @@ impl<'a> ParserImpl<'a> { fn new(token_source: &'a dyn TokenSource) -> ParserImpl<'a> { ParserImpl { token_source, - pos: InputPosition::new(), + pos: TokenPos::default(), events: Vec::new(), steps: Cell::new(0), } diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs index 8ebbd3825..e9735e526 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs @@ -1,22 +1,21 @@ use crate::{ SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, parsing::{ + TokenPos, parser_impl::TokenSource, lexer::Token, }, }; -use std::ops::{Add, AddAssign}; - impl<'t> TokenSource for ParserInput<'t> { - fn token_kind(&self, pos: InputPosition) -> SyntaxKind { + fn token_kind(&self, pos: TokenPos) -> SyntaxKind { let idx = pos.0 as usize; if !(idx < self.tokens.len()) { return EOF; } self.tokens[idx].kind } - fn is_token_joint_to_next(&self, pos: InputPosition) -> bool { + fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { let idx_curr = pos.0 as usize; let idx_next = pos.0 as usize; if !(idx_next < self.tokens.len()) { @@ -24,7 +23,7 @@ impl<'t> TokenSource for ParserInput<'t> { } self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] } - fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool { + fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool { let idx = pos.0 as usize; if !(idx < self.tokens.len()) { return false; @@ -72,26 +71,3 @@ impl<'t> ParserInput<'t> { ParserInput { text, start_offsets, tokens } } } - -#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub(crate) struct InputPosition(u32); - -impl InputPosition { - pub fn new() -> Self { - InputPosition(0) - } -} - -impl Add for InputPosition { - type Output = InputPosition; - - fn add(self, rhs: u32) -> InputPosition { - InputPosition(self.0 + rhs) - } -} - -impl AddAssign for InputPosition { - fn add_assign(&mut self, rhs: u32) { - self.0 += rhs - } -} -- cgit v1.2.3 From e72ad0a2faac98972544dd42316ccf8090717102 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 22:27:49 +0300 Subject: fix off by one error --- crates/ra_syntax/src/parsing/parser_impl/input.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs index e9735e526..11b32b9ce 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs @@ -17,7 +17,7 @@ impl<'t> TokenSource for ParserInput<'t> { } fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { let idx_curr = pos.0 as usize; - let idx_next = pos.0 as usize; + let idx_next = pos.0 as usize + 1; if !(idx_next < self.tokens.len()) { return true; } -- cgit v1.2.3 From 2acb21e8f72896c7a2855ca6042d0ee1870d8643 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 22:44:06 +0300 Subject: merge parse_impl and parser_api --- crates/ra_syntax/src/parsing/parser_api.rs | 108 +++++++++++++++--- crates/ra_syntax/src/parsing/parser_impl.rs | 165 ++-------------------------- 2 files changed, 102 insertions(+), 171 deletions(-) diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index aed23a6a4..92d7895d3 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -1,10 +1,14 @@ +use std::cell::Cell; + use drop_bomb::DropBomb; use crate::{ - SyntaxKind::{self, ERROR}, + syntax_error::ParseError, + SyntaxKind::{self, ERROR, EOF, TOMBSTONE}, parsing::{ + TokenSource, TokenPos, token_set::TokenSet, - parser_impl::ParserImpl, + parser_impl::event::Event, }, }; @@ -17,9 +21,22 @@ use crate::{ /// tree, but rather a flat stream of events of the form /// "start expression, consume number literal, /// finish expression". See `Event` docs for more. -pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); +pub(crate) struct Parser<'t> { + token_source: &'t dyn TokenSource, + pos: TokenPos, + events: Vec, + steps: Cell, +} impl<'t> Parser<'t> { + pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { + Parser { token_source, pos: TokenPos::default(), events: Vec::new(), steps: Cell::new(0) } + } + + pub(crate) fn finish(self) -> Vec { + self.events + } + /// Returns the kind of the current token. /// If parser has already reached the end of input, /// the special `EOF` kind is returned. @@ -32,7 +49,13 @@ impl<'t> Parser<'t> { /// /// Useful for parsing things like `>>`. pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - self.0.current2() + let c1 = self.token_source.token_kind(self.pos); + let c2 = self.token_source.token_kind(self.pos + 1); + if self.token_source.is_token_joint_to_next(self.pos) { + Some((c1, c2)) + } else { + None + } } /// Returns the kinds of the current three tokens, if they are not separated @@ -40,13 +63,25 @@ impl<'t> Parser<'t> { /// /// Useful for parsing things like `=>>`. pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - self.0.current3() + let c1 = self.token_source.token_kind(self.pos); + let c2 = self.token_source.token_kind(self.pos + 1); + let c3 = self.token_source.token_kind(self.pos + 2); + if self.token_source.is_token_joint_to_next(self.pos) + && self.token_source.is_token_joint_to_next(self.pos + 1) + { + Some((c1, c2, c3)) + } else { + None + } } /// Lookahead operation: returns the kind of the next nth /// token. pub(crate) fn nth(&self, n: u32) -> SyntaxKind { - self.0.nth(n) + let steps = self.steps.get(); + assert!(steps <= 10_000_000, "the parser seems stuck"); + self.steps.set(steps + 1); + self.token_source.token_kind(self.pos + n) } /// Checks if the current token is `kind`. @@ -60,20 +95,26 @@ impl<'t> Parser<'t> { } /// Checks if the current token is contextual keyword with text `t`. - pub(crate) fn at_contextual_kw(&self, t: &str) -> bool { - self.0.at_kw(t) + pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { + self.token_source.is_keyword(self.pos, kw) } /// Starts a new node in the syntax tree. All nodes and tokens /// consumed between the `start` and the corresponding `Marker::complete` /// belong to the same node. pub(crate) fn start(&mut self) -> Marker { - Marker::new(self.0.start()) + let pos = self.events.len() as u32; + self.push_event(Event::tombstone()); + Marker::new(pos) } /// Advances the parser by one token unconditionally. pub(crate) fn bump(&mut self) { - self.0.bump(); + let kind = self.nth(0); + if kind == EOF { + return; + } + self.do_bump(kind, 1); } /// Advances the parser by one token, remapping its kind. @@ -83,14 +124,18 @@ impl<'t> Parser<'t> { /// `union` keyword, and keyword is what ends up in the /// final tree. pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { - self.0.bump_remap(kind); + if self.nth(0) == EOF { + // TODO: panic!? + return; + } + self.do_bump(kind, 1); } /// Advances the parser by `n` tokens, remapping its kind. /// This is useful to create compound tokens from parts. For /// example, an `<<` token is two consecutive remapped `<` tokens pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { - self.0.bump_compound(kind, n); + self.do_bump(kind, n); } /// Emit error with the `message` @@ -98,7 +143,8 @@ impl<'t> Parser<'t> { /// structured errors with spans and notes, like rustc /// does. pub(crate) fn error>(&mut self, message: T) { - self.0.error(message.into()) + let msg = ParseError(message.into()); + self.push_event(Event::Error { msg }) } /// Consume the next token if `kind` matches. @@ -136,6 +182,15 @@ impl<'t> Parser<'t> { m.complete(self, ERROR); }; } + + fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { + self.pos += u32::from(n_raw_tokens); + self.push_event(Event::Token { kind, n_raw_tokens }); + } + + fn push_event(&mut self, event: Event) { + self.events.push(event) + } } /// See `Parser::start`. @@ -154,7 +209,14 @@ impl Marker { /// operation like `.precede()` to deal with forward_parent. pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { self.bomb.defuse(); - p.0.complete(self.pos, kind); + let idx = self.pos as usize; + match p.events[idx] { + Event::Start { kind: ref mut slot, .. } => { + *slot = kind; + } + _ => unreachable!(), + } + p.push_event(Event::Finish); CompletedMarker::new(self.pos, kind) } @@ -162,7 +224,13 @@ impl Marker { /// are attached to its parent instead. pub(crate) fn abandon(mut self, p: &mut Parser) { self.bomb.defuse(); - p.0.abandon(self.pos); + let idx = self.pos as usize; + if idx == p.events.len() - 1 { + match p.events.pop() { + Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), + _ => unreachable!(), + } + } } } @@ -186,7 +254,15 @@ impl CompletedMarker { /// then mark `NEWSTART` as `START`'s parent with saving its relative /// distance to `NEWSTART` into forward_parent(=2 in this case); pub(crate) fn precede(self, p: &mut Parser) -> Marker { - Marker::new(p.0.precede(self.0)) + let new_pos = p.start(); + let idx = self.0 as usize; + match p.events[idx] { + Event::Start { ref mut forward_parent, .. } => { + *forward_parent = Some(new_pos.pos - self.0); + } + _ => unreachable!(), + } + new_pos } pub(crate) fn kind(&self) -> SyntaxKind { diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index 89439e074..6eed0e656 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs @@ -1,20 +1,13 @@ -mod event; -pub(crate) mod input; - -use std::cell::Cell; - -use crate::{ - syntax_error::ParseError, - parsing::{ - TreeSink, TokenSource, TokenPos, - lexer::Token, - parser_api::Parser, - parser_impl::event::{Event, EventProcessor}, - }, +pub(super) mod event; +pub(super) mod input; + +use crate::parsing::{ + TreeSink, TokenSource, + lexer::Token, + parser_api::Parser, + parser_impl::event::EventProcessor, }; -use crate::SyntaxKind::{self, EOF, TOMBSTONE}; - /// Parse a sequence of tokens into the representative node tree pub(super) fn parse_with( sink: S, @@ -24,147 +17,9 @@ pub(super) fn parse_with( ) -> S::Tree { let mut events = { let input = input::ParserInput::new(text, tokens); - let parser_impl = ParserImpl::new(&input); - let mut parser_api = Parser(parser_impl); + let mut parser_api = Parser::new(&input); parser(&mut parser_api); - parser_api.0.into_events() + parser_api.finish() }; EventProcessor::new(sink, text, tokens, &mut events).process().finish() } - -/// Implementation details of `Parser`, extracted -/// to a separate struct in order not to pollute -/// the public API of the `Parser`. -pub(super) struct ParserImpl<'a> { - token_source: &'a dyn TokenSource, - pos: TokenPos, - events: Vec, - steps: Cell, -} - -impl<'a> ParserImpl<'a> { - fn new(token_source: &'a dyn TokenSource) -> ParserImpl<'a> { - ParserImpl { - token_source, - pos: TokenPos::default(), - events: Vec::new(), - steps: Cell::new(0), - } - } - - fn into_events(self) -> Vec { - assert_eq!(self.nth(0), EOF); - self.events - } - - pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - let c1 = self.token_source.token_kind(self.pos); - let c2 = self.token_source.token_kind(self.pos + 1); - if self.token_source.is_token_joint_to_next(self.pos) { - Some((c1, c2)) - } else { - None - } - } - - pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - let c1 = self.token_source.token_kind(self.pos); - let c2 = self.token_source.token_kind(self.pos + 1); - let c3 = self.token_source.token_kind(self.pos + 2); - if self.token_source.is_token_joint_to_next(self.pos) - && self.token_source.is_token_joint_to_next(self.pos + 1) - { - Some((c1, c2, c3)) - } else { - None - } - } - - /// Get the syntax kind of the nth token. - pub(super) fn nth(&self, n: u32) -> SyntaxKind { - let steps = self.steps.get(); - assert!(steps <= 10_000_000, "the parser seems stuck"); - self.steps.set(steps + 1); - self.token_source.token_kind(self.pos + n) - } - - pub(super) fn at_kw(&self, kw: &str) -> bool { - self.token_source.is_keyword(self.pos, kw) - } - - /// Start parsing right behind the last event. - pub(super) fn start(&mut self) -> u32 { - let pos = self.events.len() as u32; - self.push_event(Event::tombstone()); - pos - } - - /// Advances the parser by one token unconditionally. - pub(super) fn bump(&mut self) { - let kind = self.nth(0); - if kind == EOF { - return; - } - self.do_bump(kind, 1); - } - - pub(super) fn bump_remap(&mut self, kind: SyntaxKind) { - if self.nth(0) == EOF { - // TODO: panic!? - return; - } - self.do_bump(kind, 1); - } - - pub(super) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { - self.do_bump(kind, n); - } - - fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { - self.pos += u32::from(n_raw_tokens); - self.push_event(Event::Token { kind, n_raw_tokens }); - } - - /// Append one Error event to the back of events. - pub(super) fn error(&mut self, msg: String) { - self.push_event(Event::Error { msg: ParseError(msg) }) - } - - /// Complete an event with appending a `Finish` event. - pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { - match self.events[pos as usize] { - Event::Start { kind: ref mut slot, .. } => { - *slot = kind; - } - _ => unreachable!(), - } - self.push_event(Event::Finish); - } - - /// Ignore the dummy `Start` event. - pub(super) fn abandon(&mut self, pos: u32) { - let idx = pos as usize; - if idx == self.events.len() - 1 { - match self.events.pop() { - Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), - _ => unreachable!(), - } - } - } - - /// Save the relative distance of a completed event to its forward_parent. - pub(super) fn precede(&mut self, pos: u32) -> u32 { - let new_pos = self.start(); - match self.events[pos as usize] { - Event::Start { ref mut forward_parent, .. } => { - *forward_parent = Some(new_pos - pos); - } - _ => unreachable!(), - } - new_pos - } - - fn push_event(&mut self, event: Event) { - self.events.push(event) - } -} -- cgit v1.2.3 From cce23fddba4241202ebd29cce44db4ce9a08793a Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 22:52:32 +0300 Subject: flattern module structure --- crates/ra_syntax/src/parsing.rs | 29 ++- crates/ra_syntax/src/parsing/event.rs | 254 ++++++++++++++++++++++ crates/ra_syntax/src/parsing/input.rs | 72 ++++++ crates/ra_syntax/src/parsing/parser_api.rs | 2 +- crates/ra_syntax/src/parsing/parser_impl/event.rs | 254 ---------------------- crates/ra_syntax/src/parsing/parser_impl/input.rs | 73 ------- crates/ra_syntax/src/parsing/reparsing.rs | 6 +- 7 files changed, 353 insertions(+), 337 deletions(-) create mode 100644 crates/ra_syntax/src/parsing/event.rs create mode 100644 crates/ra_syntax/src/parsing/input.rs delete mode 100644 crates/ra_syntax/src/parsing/parser_impl/event.rs delete mode 100644 crates/ra_syntax/src/parsing/parser_impl/input.rs diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 6c2c5f78b..f74c365d5 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -2,14 +2,20 @@ mod token_set; mod builder; mod lexer; -mod parser_impl; +mod event; +mod input; mod parser_api; mod grammar; mod reparsing; use crate::{ SyntaxError, SyntaxKind, SmolStr, - parsing::builder::GreenBuilder, + parsing::{ + builder::GreenBuilder, + input::ParserInput, + event::EventProcessor, + parser_api::Parser, + }, syntax_node::GreenNode, }; @@ -19,9 +25,22 @@ pub(crate) use self::reparsing::incremental_reparse; pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec) { let tokens = tokenize(&text); - let (green, errors) = - parser_impl::parse_with(GreenBuilder::new(), text, &tokens, grammar::root); - (green, errors) + parse_with(GreenBuilder::new(), text, &tokens, grammar::root) +} + +fn parse_with( + tree_sink: S, + text: &str, + tokens: &[Token], + f: fn(&mut Parser), +) -> S::Tree { + let mut events = { + let input = ParserInput::new(text, &tokens); + let mut p = Parser::new(&input); + f(&mut p); + p.finish() + }; + EventProcessor::new(tree_sink, text, tokens, &mut events).process().finish() } /// `TreeSink` abstracts details of a particular syntax tree implementation. diff --git a/crates/ra_syntax/src/parsing/event.rs b/crates/ra_syntax/src/parsing/event.rs new file mode 100644 index 000000000..893a42e9a --- /dev/null +++ b/crates/ra_syntax/src/parsing/event.rs @@ -0,0 +1,254 @@ +//! This module provides a way to construct a `File`. +//! It is intended to be completely decoupled from the +//! parser, so as to allow to evolve the tree representation +//! and the parser algorithm independently. +//! +//! The `TreeSink` trait is the bridge between the parser and the +//! tree builder: the parser produces a stream of events like +//! `start node`, `finish node`, and `FileBuilder` converts +//! this stream to a real tree. +use std::mem; + +use crate::{ + SmolStr, + SyntaxKind::{self, *}, + TextRange, TextUnit, + syntax_error::{ + ParseError, + SyntaxError, + SyntaxErrorKind, + }, + parsing::{ + lexer::Token, + TreeSink, + }, +}; + +/// `Parser` produces a flat list of `Event`s. +/// They are converted to a tree-structure in +/// a separate pass, via `TreeBuilder`. +#[derive(Debug)] +pub(crate) enum Event { + /// This event signifies the start of the node. + /// It should be either abandoned (in which case the + /// `kind` is `TOMBSTONE`, and the event is ignored), + /// or completed via a `Finish` event. + /// + /// All tokens between a `Start` and a `Finish` would + /// become the children of the respective node. + /// + /// For left-recursive syntactic constructs, the parser produces + /// a child node before it sees a parent. `forward_parent` + /// saves the position of current event's parent. + /// + /// Consider this path + /// + /// foo::bar + /// + /// The events for it would look like this: + /// + /// + /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH + /// | /\ + /// | | + /// +------forward-parent------+ + /// + /// And the tree would look like this + /// + /// +--PATH---------+ + /// | | | + /// | | | + /// | '::' 'bar' + /// | + /// PATH + /// | + /// 'foo' + /// + /// See also `CompletedMarker::precede`. + Start { + kind: SyntaxKind, + forward_parent: Option, + }, + + /// Complete the previous `Start` event + Finish, + + /// Produce a single leaf-element. + /// `n_raw_tokens` is used to glue complex contextual tokens. + /// For example, lexer tokenizes `>>` as `>`, `>`, and + /// `n_raw_tokens = 2` is used to produced a single `>>`. + Token { + kind: SyntaxKind, + n_raw_tokens: u8, + }, + + Error { + msg: ParseError, + }, +} + +impl Event { + pub(crate) fn tombstone() -> Self { + Event::Start { kind: TOMBSTONE, forward_parent: None } + } +} + +pub(super) struct EventProcessor<'a, S: TreeSink> { + sink: S, + text_pos: TextUnit, + text: &'a str, + token_pos: usize, + tokens: &'a [Token], + events: &'a mut [Event], +} + +impl<'a, S: TreeSink> EventProcessor<'a, S> { + pub(super) fn new( + sink: S, + text: &'a str, + tokens: &'a [Token], + events: &'a mut [Event], + ) -> EventProcessor<'a, S> { + EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events } + } + + /// Generate the syntax tree with the control of events. + pub(crate) fn process(mut self) -> S { + let mut forward_parents = Vec::new(); + + for i in 0..self.events.len() { + match mem::replace(&mut self.events[i], Event::tombstone()) { + Event::Start { kind: TOMBSTONE, .. } => (), + + Event::Start { kind, forward_parent } => { + // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, + // in the normal control flow, the parent-child relation: `A -> B -> C`, + // while with the magic forward_parent, it writes: `C <- B <- A`. + + // append `A` into parents. + forward_parents.push(kind); + let mut idx = i; + let mut fp = forward_parent; + while let Some(fwd) = fp { + idx += fwd as usize; + // append `A`'s forward_parent `B` + fp = match mem::replace(&mut self.events[idx], Event::tombstone()) { + Event::Start { kind, forward_parent } => { + forward_parents.push(kind); + forward_parent + } + _ => unreachable!(), + }; + // append `B`'s forward_parent `C` in the next stage. + } + + for kind in forward_parents.drain(..).rev() { + self.start(kind); + } + } + Event::Finish => { + let is_last = i == self.events.len() - 1; + self.finish(is_last); + } + Event::Token { kind, n_raw_tokens } => { + self.eat_trivias(); + let n_raw_tokens = n_raw_tokens as usize; + let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens] + .iter() + .map(|it| it.len) + .sum::(); + self.leaf(kind, len, n_raw_tokens); + } + Event::Error { msg } => self + .sink + .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)), + } + } + self.sink + } + + /// Add the node into syntax tree but discard the comments/whitespaces. + fn start(&mut self, kind: SyntaxKind) { + if kind == SOURCE_FILE { + self.sink.start_branch(kind); + return; + } + let n_trivias = + self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); + let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; + let mut trivia_end = + self.text_pos + leading_trivias.iter().map(|it| it.len).sum::(); + + let n_attached_trivias = { + let leading_trivias = leading_trivias.iter().rev().map(|it| { + let next_end = trivia_end - it.len; + let range = TextRange::from_to(next_end, trivia_end); + trivia_end = next_end; + (it.kind, &self.text[range]) + }); + n_attached_trivias(kind, leading_trivias) + }; + self.eat_n_trivias(n_trivias - n_attached_trivias); + self.sink.start_branch(kind); + self.eat_n_trivias(n_attached_trivias); + } + + fn finish(&mut self, is_last: bool) { + if is_last { + self.eat_trivias() + } + self.sink.finish_branch(); + } + + fn eat_trivias(&mut self) { + while let Some(&token) = self.tokens.get(self.token_pos) { + if !token.kind.is_trivia() { + break; + } + self.leaf(token.kind, token.len, 1); + } + } + + fn eat_n_trivias(&mut self, n: usize) { + for _ in 0..n { + let token = self.tokens[self.token_pos]; + assert!(token.kind.is_trivia()); + self.leaf(token.kind, token.len, 1); + } + } + + fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { + let range = TextRange::offset_len(self.text_pos, len); + let text: SmolStr = self.text[range].into(); + self.text_pos += len; + self.token_pos += n_tokens; + self.sink.leaf(kind, text); + } +} + +fn n_attached_trivias<'a>( + kind: SyntaxKind, + trivias: impl Iterator, +) -> usize { + match kind { + CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF + | MODULE | NAMED_FIELD_DEF => { + let mut res = 0; + for (i, (kind, text)) in trivias.enumerate() { + match kind { + WHITESPACE => { + if text.contains("\n\n") { + break; + } + } + COMMENT => { + res = i + 1; + } + _ => (), + } + } + res + } + _ => 0, + } +} diff --git a/crates/ra_syntax/src/parsing/input.rs b/crates/ra_syntax/src/parsing/input.rs new file mode 100644 index 000000000..0f1810df5 --- /dev/null +++ b/crates/ra_syntax/src/parsing/input.rs @@ -0,0 +1,72 @@ +use crate::{ + SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, + parsing::{ + TokenPos, TokenSource, + lexer::Token, + }, +}; + +impl<'t> TokenSource for ParserInput<'t> { + fn token_kind(&self, pos: TokenPos) -> SyntaxKind { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return EOF; + } + self.tokens[idx].kind + } + fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { + let idx_curr = pos.0 as usize; + let idx_next = pos.0 as usize + 1; + if !(idx_next < self.tokens.len()) { + return true; + } + self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] + } + fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return false; + } + let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); + + self.text[range] == *kw + } +} + +pub(crate) struct ParserInput<'t> { + text: &'t str, + /// start position of each token(expect whitespace and comment) + /// ```non-rust + /// struct Foo; + /// ^------^--- + /// | | ^- + /// 0 7 10 + /// ``` + /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` + start_offsets: Vec, + /// non-whitespace/comment tokens + /// ```non-rust + /// struct Foo {} + /// ^^^^^^ ^^^ ^^ + /// ``` + /// tokens: `[struct, Foo, {, }]` + tokens: Vec, +} + +impl<'t> ParserInput<'t> { + /// Generate input from tokens(expect comment and whitespace). + pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { + let mut tokens = Vec::new(); + let mut start_offsets = Vec::new(); + let mut len = 0.into(); + for &token in raw_tokens.iter() { + if !token.kind.is_trivia() { + tokens.push(token); + start_offsets.push(len); + } + len += token.len; + } + + ParserInput { text, start_offsets, tokens } + } +} diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index 92d7895d3..99f6183a4 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -8,7 +8,7 @@ use crate::{ parsing::{ TokenSource, TokenPos, token_set::TokenSet, - parser_impl::event::Event, + event::Event, }, }; diff --git a/crates/ra_syntax/src/parsing/parser_impl/event.rs b/crates/ra_syntax/src/parsing/parser_impl/event.rs deleted file mode 100644 index 9663fba35..000000000 --- a/crates/ra_syntax/src/parsing/parser_impl/event.rs +++ /dev/null @@ -1,254 +0,0 @@ -//! This module provides a way to construct a `File`. -//! It is intended to be completely decoupled from the -//! parser, so as to allow to evolve the tree representation -//! and the parser algorithm independently. -//! -//! The `TreeSink` trait is the bridge between the parser and the -//! tree builder: the parser produces a stream of events like -//! `start node`, `finish node`, and `FileBuilder` converts -//! this stream to a real tree. -use std::mem; - -use crate::{ - SmolStr, - SyntaxKind::{self, *}, - TextRange, TextUnit, - syntax_error::{ - ParseError, - SyntaxError, - SyntaxErrorKind, - }, - parsing::{ - lexer::Token, - parser_impl::TreeSink, - }, -}; - -/// `Parser` produces a flat list of `Event`s. -/// They are converted to a tree-structure in -/// a separate pass, via `TreeBuilder`. -#[derive(Debug)] -pub(crate) enum Event { - /// This event signifies the start of the node. - /// It should be either abandoned (in which case the - /// `kind` is `TOMBSTONE`, and the event is ignored), - /// or completed via a `Finish` event. - /// - /// All tokens between a `Start` and a `Finish` would - /// become the children of the respective node. - /// - /// For left-recursive syntactic constructs, the parser produces - /// a child node before it sees a parent. `forward_parent` - /// saves the position of current event's parent. - /// - /// Consider this path - /// - /// foo::bar - /// - /// The events for it would look like this: - /// - /// - /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH - /// | /\ - /// | | - /// +------forward-parent------+ - /// - /// And the tree would look like this - /// - /// +--PATH---------+ - /// | | | - /// | | | - /// | '::' 'bar' - /// | - /// PATH - /// | - /// 'foo' - /// - /// See also `CompletedMarker::precede`. - Start { - kind: SyntaxKind, - forward_parent: Option, - }, - - /// Complete the previous `Start` event - Finish, - - /// Produce a single leaf-element. - /// `n_raw_tokens` is used to glue complex contextual tokens. - /// For example, lexer tokenizes `>>` as `>`, `>`, and - /// `n_raw_tokens = 2` is used to produced a single `>>`. - Token { - kind: SyntaxKind, - n_raw_tokens: u8, - }, - - Error { - msg: ParseError, - }, -} - -impl Event { - pub(crate) fn tombstone() -> Self { - Event::Start { kind: TOMBSTONE, forward_parent: None } - } -} - -pub(super) struct EventProcessor<'a, S: TreeSink> { - sink: S, - text_pos: TextUnit, - text: &'a str, - token_pos: usize, - tokens: &'a [Token], - events: &'a mut [Event], -} - -impl<'a, S: TreeSink> EventProcessor<'a, S> { - pub(super) fn new( - sink: S, - text: &'a str, - tokens: &'a [Token], - events: &'a mut [Event], - ) -> EventProcessor<'a, S> { - EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events } - } - - /// Generate the syntax tree with the control of events. - pub(super) fn process(mut self) -> S { - let mut forward_parents = Vec::new(); - - for i in 0..self.events.len() { - match mem::replace(&mut self.events[i], Event::tombstone()) { - Event::Start { kind: TOMBSTONE, .. } => (), - - Event::Start { kind, forward_parent } => { - // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, - // in the normal control flow, the parent-child relation: `A -> B -> C`, - // while with the magic forward_parent, it writes: `C <- B <- A`. - - // append `A` into parents. - forward_parents.push(kind); - let mut idx = i; - let mut fp = forward_parent; - while let Some(fwd) = fp { - idx += fwd as usize; - // append `A`'s forward_parent `B` - fp = match mem::replace(&mut self.events[idx], Event::tombstone()) { - Event::Start { kind, forward_parent } => { - forward_parents.push(kind); - forward_parent - } - _ => unreachable!(), - }; - // append `B`'s forward_parent `C` in the next stage. - } - - for kind in forward_parents.drain(..).rev() { - self.start(kind); - } - } - Event::Finish => { - let is_last = i == self.events.len() - 1; - self.finish(is_last); - } - Event::Token { kind, n_raw_tokens } => { - self.eat_trivias(); - let n_raw_tokens = n_raw_tokens as usize; - let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens] - .iter() - .map(|it| it.len) - .sum::(); - self.leaf(kind, len, n_raw_tokens); - } - Event::Error { msg } => self - .sink - .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)), - } - } - self.sink - } - - /// Add the node into syntax tree but discard the comments/whitespaces. - fn start(&mut self, kind: SyntaxKind) { - if kind == SOURCE_FILE { - self.sink.start_branch(kind); - return; - } - let n_trivias = - self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); - let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; - let mut trivia_end = - self.text_pos + leading_trivias.iter().map(|it| it.len).sum::(); - - let n_attached_trivias = { - let leading_trivias = leading_trivias.iter().rev().map(|it| { - let next_end = trivia_end - it.len; - let range = TextRange::from_to(next_end, trivia_end); - trivia_end = next_end; - (it.kind, &self.text[range]) - }); - n_attached_trivias(kind, leading_trivias) - }; - self.eat_n_trivias(n_trivias - n_attached_trivias); - self.sink.start_branch(kind); - self.eat_n_trivias(n_attached_trivias); - } - - fn finish(&mut self, is_last: bool) { - if is_last { - self.eat_trivias() - } - self.sink.finish_branch(); - } - - fn eat_trivias(&mut self) { - while let Some(&token) = self.tokens.get(self.token_pos) { - if !token.kind.is_trivia() { - break; - } - self.leaf(token.kind, token.len, 1); - } - } - - fn eat_n_trivias(&mut self, n: usize) { - for _ in 0..n { - let token = self.tokens[self.token_pos]; - assert!(token.kind.is_trivia()); - self.leaf(token.kind, token.len, 1); - } - } - - fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { - let range = TextRange::offset_len(self.text_pos, len); - let text: SmolStr = self.text[range].into(); - self.text_pos += len; - self.token_pos += n_tokens; - self.sink.leaf(kind, text); - } -} - -fn n_attached_trivias<'a>( - kind: SyntaxKind, - trivias: impl Iterator, -) -> usize { - match kind { - CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF - | MODULE | NAMED_FIELD_DEF => { - let mut res = 0; - for (i, (kind, text)) in trivias.enumerate() { - match kind { - WHITESPACE => { - if text.contains("\n\n") { - break; - } - } - COMMENT => { - res = i + 1; - } - _ => (), - } - } - res - } - _ => 0, - } -} diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs deleted file mode 100644 index 11b32b9ce..000000000 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ /dev/null @@ -1,73 +0,0 @@ -use crate::{ - SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, - parsing::{ - TokenPos, - parser_impl::TokenSource, - lexer::Token, - }, -}; - -impl<'t> TokenSource for ParserInput<'t> { - fn token_kind(&self, pos: TokenPos) -> SyntaxKind { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return EOF; - } - self.tokens[idx].kind - } - fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { - let idx_curr = pos.0 as usize; - let idx_next = pos.0 as usize + 1; - if !(idx_next < self.tokens.len()) { - return true; - } - self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] - } - fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return false; - } - let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); - - self.text[range] == *kw - } -} - -pub(crate) struct ParserInput<'t> { - text: &'t str, - /// start position of each token(expect whitespace and comment) - /// ```non-rust - /// struct Foo; - /// ^------^--- - /// | | ^- - /// 0 7 10 - /// ``` - /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` - start_offsets: Vec, - /// non-whitespace/comment tokens - /// ```non-rust - /// struct Foo {} - /// ^^^^^^ ^^^ ^^ - /// ``` - /// tokens: `[struct, Foo, {, }]` - tokens: Vec, -} - -impl<'t> ParserInput<'t> { - /// Generate input from tokens(expect comment and whitespace). - pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { - let mut tokens = Vec::new(); - let mut start_offsets = Vec::new(); - let mut len = 0.into(); - for &token in raw_tokens.iter() { - if !token.kind.is_trivia() { - tokens.push(token); - start_offsets.push(len); - } - len += token.len; - } - - ParserInput { text, start_offsets, tokens } - } -} diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index edf3fa291..f45326dff 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -4,8 +4,7 @@ use crate::{ syntax_node::{GreenNode, SyntaxNode}, syntax_error::SyntaxError, parsing::{ - grammar, - parser_impl, + grammar, parse_with, builder::GreenBuilder, parser_api::Parser, lexer::{tokenize, Token}, @@ -62,8 +61,7 @@ fn reparse_block<'node>( if !is_balanced(&tokens) { return None; } - let (green, new_errors) = - parser_impl::parse_with(GreenBuilder::new(), &text, &tokens, reparser); + let (green, new_errors) = parse_with(GreenBuilder::new(), &text, &tokens, reparser); Some((node, green, new_errors)) } -- cgit v1.2.3 From 4c1f9b8d4e9ab9ba3b16d2b03f3c8bcc7f61706e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 22:58:56 +0300 Subject: remove TokenPos --- crates/ra_syntax/src/parsing.rs | 23 +++-------------------- crates/ra_syntax/src/parsing/input.rs | 24 ++++++++++-------------- crates/ra_syntax/src/parsing/parser_api.rs | 30 +++++++++++++++--------------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index f74c365d5..5de6ff8c1 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -69,24 +69,7 @@ trait TreeSink { /// /// Hopefully this will allow us to treat text and token trees in the same way! trait TokenSource { - fn token_kind(&self, pos: TokenPos) -> SyntaxKind; - fn is_token_joint_to_next(&self, pos: TokenPos) -> bool; - fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool; -} - -#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Default)] -pub(crate) struct TokenPos(pub u32); - -impl std::ops::Add for TokenPos { - type Output = TokenPos; - - fn add(self, rhs: u32) -> TokenPos { - TokenPos(self.0 + rhs) - } -} - -impl std::ops::AddAssign for TokenPos { - fn add_assign(&mut self, rhs: u32) { - self.0 += rhs - } + fn token_kind(&self, pos: usize) -> SyntaxKind; + fn is_token_joint_to_next(&self, pos: usize) -> bool; + fn is_keyword(&self, pos: usize, kw: &str) -> bool; } diff --git a/crates/ra_syntax/src/parsing/input.rs b/crates/ra_syntax/src/parsing/input.rs index 0f1810df5..96c03bb11 100644 --- a/crates/ra_syntax/src/parsing/input.rs +++ b/crates/ra_syntax/src/parsing/input.rs @@ -1,33 +1,29 @@ use crate::{ SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, parsing::{ - TokenPos, TokenSource, + TokenSource, lexer::Token, }, }; impl<'t> TokenSource for ParserInput<'t> { - fn token_kind(&self, pos: TokenPos) -> SyntaxKind { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { + fn token_kind(&self, pos: usize) -> SyntaxKind { + if !(pos < self.tokens.len()) { return EOF; } - self.tokens[idx].kind + self.tokens[pos].kind } - fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { - let idx_curr = pos.0 as usize; - let idx_next = pos.0 as usize + 1; - if !(idx_next < self.tokens.len()) { + fn is_token_joint_to_next(&self, pos: usize) -> bool { + if !(pos + 1 < self.tokens.len()) { return true; } - self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] + self.start_offsets[pos] + self.tokens[pos].len == self.start_offsets[pos + 1] } - fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { + fn is_keyword(&self, pos: usize, kw: &str) -> bool { + if !(pos < self.tokens.len()) { return false; } - let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); + let range = TextRange::offset_len(self.start_offsets[pos], self.tokens[pos].len); self.text[range] == *kw } diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index 99f6183a4..988fcb518 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -6,7 +6,7 @@ use crate::{ syntax_error::ParseError, SyntaxKind::{self, ERROR, EOF, TOMBSTONE}, parsing::{ - TokenSource, TokenPos, + TokenSource, token_set::TokenSet, event::Event, }, @@ -23,14 +23,14 @@ use crate::{ /// finish expression". See `Event` docs for more. pub(crate) struct Parser<'t> { token_source: &'t dyn TokenSource, - pos: TokenPos, + token_pos: usize, events: Vec, steps: Cell, } impl<'t> Parser<'t> { pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { - Parser { token_source, pos: TokenPos::default(), events: Vec::new(), steps: Cell::new(0) } + Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) } } pub(crate) fn finish(self) -> Vec { @@ -49,9 +49,9 @@ impl<'t> Parser<'t> { /// /// Useful for parsing things like `>>`. pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - let c1 = self.token_source.token_kind(self.pos); - let c2 = self.token_source.token_kind(self.pos + 1); - if self.token_source.is_token_joint_to_next(self.pos) { + let c1 = self.token_source.token_kind(self.token_pos); + let c2 = self.token_source.token_kind(self.token_pos + 1); + if self.token_source.is_token_joint_to_next(self.token_pos) { Some((c1, c2)) } else { None @@ -63,11 +63,11 @@ impl<'t> Parser<'t> { /// /// Useful for parsing things like `=>>`. pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - let c1 = self.token_source.token_kind(self.pos); - let c2 = self.token_source.token_kind(self.pos + 1); - let c3 = self.token_source.token_kind(self.pos + 2); - if self.token_source.is_token_joint_to_next(self.pos) - && self.token_source.is_token_joint_to_next(self.pos + 1) + let c1 = self.token_source.token_kind(self.token_pos); + let c2 = self.token_source.token_kind(self.token_pos + 1); + let c3 = self.token_source.token_kind(self.token_pos + 2); + if self.token_source.is_token_joint_to_next(self.token_pos) + && self.token_source.is_token_joint_to_next(self.token_pos + 1) { Some((c1, c2, c3)) } else { @@ -77,11 +77,11 @@ impl<'t> Parser<'t> { /// Lookahead operation: returns the kind of the next nth /// token. - pub(crate) fn nth(&self, n: u32) -> SyntaxKind { + pub(crate) fn nth(&self, n: usize) -> SyntaxKind { let steps = self.steps.get(); assert!(steps <= 10_000_000, "the parser seems stuck"); self.steps.set(steps + 1); - self.token_source.token_kind(self.pos + n) + self.token_source.token_kind(self.token_pos + n) } /// Checks if the current token is `kind`. @@ -96,7 +96,7 @@ impl<'t> Parser<'t> { /// Checks if the current token is contextual keyword with text `t`. pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { - self.token_source.is_keyword(self.pos, kw) + self.token_source.is_keyword(self.token_pos, kw) } /// Starts a new node in the syntax tree. All nodes and tokens @@ -184,7 +184,7 @@ impl<'t> Parser<'t> { } fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { - self.pos += u32::from(n_raw_tokens); + self.token_pos += usize::from(n_raw_tokens); self.push_event(Event::Token { kind, n_raw_tokens }); } -- cgit v1.2.3 From 61992dc1cd4956038e3c15439c1203f21e05af06 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 23:05:59 +0300 Subject: simplify --- crates/ra_syntax/src/parsing.rs | 4 +- crates/ra_syntax/src/parsing/grammar.rs | 2 +- crates/ra_syntax/src/parsing/parser.rs | 271 ++++++++++++++++++++++++++++ crates/ra_syntax/src/parsing/parser_api.rs | 271 ---------------------------- crates/ra_syntax/src/parsing/parser_impl.rs | 25 --- crates/ra_syntax/src/parsing/reparsing.rs | 2 +- 6 files changed, 275 insertions(+), 300 deletions(-) create mode 100644 crates/ra_syntax/src/parsing/parser.rs delete mode 100644 crates/ra_syntax/src/parsing/parser_api.rs delete mode 100644 crates/ra_syntax/src/parsing/parser_impl.rs diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 5de6ff8c1..941ec501e 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -4,7 +4,7 @@ mod builder; mod lexer; mod event; mod input; -mod parser_api; +mod parser; mod grammar; mod reparsing; @@ -14,7 +14,7 @@ use crate::{ builder::GreenBuilder, input::ParserInput, event::EventProcessor, - parser_api::Parser, + parser::Parser, }, syntax_node::GreenNode, }; diff --git a/crates/ra_syntax/src/parsing/grammar.rs b/crates/ra_syntax/src/parsing/grammar.rs index bcdcd9f57..7ca9c223c 100644 --- a/crates/ra_syntax/src/parsing/grammar.rs +++ b/crates/ra_syntax/src/parsing/grammar.rs @@ -41,7 +41,7 @@ use crate::{ SyntaxKind::{self, *}, parsing::{ token_set::TokenSet, - parser_api::{CompletedMarker, Marker, Parser} + parser::{CompletedMarker, Marker, Parser} }, }; diff --git a/crates/ra_syntax/src/parsing/parser.rs b/crates/ra_syntax/src/parsing/parser.rs new file mode 100644 index 000000000..988fcb518 --- /dev/null +++ b/crates/ra_syntax/src/parsing/parser.rs @@ -0,0 +1,271 @@ +use std::cell::Cell; + +use drop_bomb::DropBomb; + +use crate::{ + syntax_error::ParseError, + SyntaxKind::{self, ERROR, EOF, TOMBSTONE}, + parsing::{ + TokenSource, + token_set::TokenSet, + event::Event, + }, +}; + +/// `Parser` struct provides the low-level API for +/// navigating through the stream of tokens and +/// constructing the parse tree. The actual parsing +/// happens in the `grammar` module. +/// +/// However, the result of this `Parser` is not a real +/// tree, but rather a flat stream of events of the form +/// "start expression, consume number literal, +/// finish expression". See `Event` docs for more. +pub(crate) struct Parser<'t> { + token_source: &'t dyn TokenSource, + token_pos: usize, + events: Vec, + steps: Cell, +} + +impl<'t> Parser<'t> { + pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { + Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) } + } + + pub(crate) fn finish(self) -> Vec { + self.events + } + + /// Returns the kind of the current token. + /// If parser has already reached the end of input, + /// the special `EOF` kind is returned. + pub(crate) fn current(&self) -> SyntaxKind { + self.nth(0) + } + + /// Returns the kinds of the current two tokens, if they are not separated + /// by trivia. + /// + /// Useful for parsing things like `>>`. + pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { + let c1 = self.token_source.token_kind(self.token_pos); + let c2 = self.token_source.token_kind(self.token_pos + 1); + if self.token_source.is_token_joint_to_next(self.token_pos) { + Some((c1, c2)) + } else { + None + } + } + + /// Returns the kinds of the current three tokens, if they are not separated + /// by trivia. + /// + /// Useful for parsing things like `=>>`. + pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { + let c1 = self.token_source.token_kind(self.token_pos); + let c2 = self.token_source.token_kind(self.token_pos + 1); + let c3 = self.token_source.token_kind(self.token_pos + 2); + if self.token_source.is_token_joint_to_next(self.token_pos) + && self.token_source.is_token_joint_to_next(self.token_pos + 1) + { + Some((c1, c2, c3)) + } else { + None + } + } + + /// Lookahead operation: returns the kind of the next nth + /// token. + pub(crate) fn nth(&self, n: usize) -> SyntaxKind { + let steps = self.steps.get(); + assert!(steps <= 10_000_000, "the parser seems stuck"); + self.steps.set(steps + 1); + self.token_source.token_kind(self.token_pos + n) + } + + /// Checks if the current token is `kind`. + pub(crate) fn at(&self, kind: SyntaxKind) -> bool { + self.current() == kind + } + + /// Checks if the current token is in `kinds`. + pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool { + kinds.contains(self.current()) + } + + /// Checks if the current token is contextual keyword with text `t`. + pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { + self.token_source.is_keyword(self.token_pos, kw) + } + + /// Starts a new node in the syntax tree. All nodes and tokens + /// consumed between the `start` and the corresponding `Marker::complete` + /// belong to the same node. + pub(crate) fn start(&mut self) -> Marker { + let pos = self.events.len() as u32; + self.push_event(Event::tombstone()); + Marker::new(pos) + } + + /// Advances the parser by one token unconditionally. + pub(crate) fn bump(&mut self) { + let kind = self.nth(0); + if kind == EOF { + return; + } + self.do_bump(kind, 1); + } + + /// Advances the parser by one token, remapping its kind. + /// This is useful to create contextual keywords from + /// identifiers. For example, the lexer creates an `union` + /// *identifier* token, but the parser remaps it to the + /// `union` keyword, and keyword is what ends up in the + /// final tree. + pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { + if self.nth(0) == EOF { + // TODO: panic!? + return; + } + self.do_bump(kind, 1); + } + + /// Advances the parser by `n` tokens, remapping its kind. + /// This is useful to create compound tokens from parts. For + /// example, an `<<` token is two consecutive remapped `<` tokens + pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { + self.do_bump(kind, n); + } + + /// Emit error with the `message` + /// TODO: this should be much more fancy and support + /// structured errors with spans and notes, like rustc + /// does. + pub(crate) fn error>(&mut self, message: T) { + let msg = ParseError(message.into()); + self.push_event(Event::Error { msg }) + } + + /// Consume the next token if `kind` matches. + pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { + if !self.at(kind) { + return false; + } + self.bump(); + true + } + + /// Consume the next token if it is `kind` or emit an error + /// otherwise. + pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { + if self.eat(kind) { + return true; + } + self.error(format!("expected {:?}", kind)); + false + } + + /// Create an error node and consume the next token. + pub(crate) fn err_and_bump(&mut self, message: &str) { + self.err_recover(message, TokenSet::empty()); + } + + /// Create an error node and consume the next token. + pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) { + if self.at(SyntaxKind::L_CURLY) || self.at(SyntaxKind::R_CURLY) || self.at_ts(recovery) { + self.error(message); + } else { + let m = self.start(); + self.error(message); + self.bump(); + m.complete(self, ERROR); + }; + } + + fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { + self.token_pos += usize::from(n_raw_tokens); + self.push_event(Event::Token { kind, n_raw_tokens }); + } + + fn push_event(&mut self, event: Event) { + self.events.push(event) + } +} + +/// See `Parser::start`. +pub(crate) struct Marker { + pos: u32, + bomb: DropBomb, +} + +impl Marker { + fn new(pos: u32) -> Marker { + Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") } + } + + /// Finishes the syntax tree node and assigns `kind` to it, + /// and mark the create a `CompletedMarker` for possible future + /// operation like `.precede()` to deal with forward_parent. + pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { + self.bomb.defuse(); + let idx = self.pos as usize; + match p.events[idx] { + Event::Start { kind: ref mut slot, .. } => { + *slot = kind; + } + _ => unreachable!(), + } + p.push_event(Event::Finish); + CompletedMarker::new(self.pos, kind) + } + + /// Abandons the syntax tree node. All its children + /// are attached to its parent instead. + pub(crate) fn abandon(mut self, p: &mut Parser) { + self.bomb.defuse(); + let idx = self.pos as usize; + if idx == p.events.len() - 1 { + match p.events.pop() { + Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), + _ => unreachable!(), + } + } + } +} + +pub(crate) struct CompletedMarker(u32, SyntaxKind); + +impl CompletedMarker { + fn new(pos: u32, kind: SyntaxKind) -> Self { + CompletedMarker(pos, kind) + } + + /// This method allows to create a new node which starts + /// *before* the current one. That is, parser could start + /// node `A`, then complete it, and then after parsing the + /// whole `A`, decide that it should have started some node + /// `B` before starting `A`. `precede` allows to do exactly + /// that. See also docs about `forward_parent` in `Event::Start`. + /// + /// Given completed events `[START, FINISH]` and its corresponding + /// `CompletedMarker(pos: 0, _)`. + /// Append a new `START` events as `[START, FINISH, NEWSTART]`, + /// then mark `NEWSTART` as `START`'s parent with saving its relative + /// distance to `NEWSTART` into forward_parent(=2 in this case); + pub(crate) fn precede(self, p: &mut Parser) -> Marker { + let new_pos = p.start(); + let idx = self.0 as usize; + match p.events[idx] { + Event::Start { ref mut forward_parent, .. } => { + *forward_parent = Some(new_pos.pos - self.0); + } + _ => unreachable!(), + } + new_pos + } + + pub(crate) fn kind(&self) -> SyntaxKind { + self.1 + } +} diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs deleted file mode 100644 index 988fcb518..000000000 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ /dev/null @@ -1,271 +0,0 @@ -use std::cell::Cell; - -use drop_bomb::DropBomb; - -use crate::{ - syntax_error::ParseError, - SyntaxKind::{self, ERROR, EOF, TOMBSTONE}, - parsing::{ - TokenSource, - token_set::TokenSet, - event::Event, - }, -}; - -/// `Parser` struct provides the low-level API for -/// navigating through the stream of tokens and -/// constructing the parse tree. The actual parsing -/// happens in the `grammar` module. -/// -/// However, the result of this `Parser` is not a real -/// tree, but rather a flat stream of events of the form -/// "start expression, consume number literal, -/// finish expression". See `Event` docs for more. -pub(crate) struct Parser<'t> { - token_source: &'t dyn TokenSource, - token_pos: usize, - events: Vec, - steps: Cell, -} - -impl<'t> Parser<'t> { - pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { - Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) } - } - - pub(crate) fn finish(self) -> Vec { - self.events - } - - /// Returns the kind of the current token. - /// If parser has already reached the end of input, - /// the special `EOF` kind is returned. - pub(crate) fn current(&self) -> SyntaxKind { - self.nth(0) - } - - /// Returns the kinds of the current two tokens, if they are not separated - /// by trivia. - /// - /// Useful for parsing things like `>>`. - pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - let c1 = self.token_source.token_kind(self.token_pos); - let c2 = self.token_source.token_kind(self.token_pos + 1); - if self.token_source.is_token_joint_to_next(self.token_pos) { - Some((c1, c2)) - } else { - None - } - } - - /// Returns the kinds of the current three tokens, if they are not separated - /// by trivia. - /// - /// Useful for parsing things like `=>>`. - pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - let c1 = self.token_source.token_kind(self.token_pos); - let c2 = self.token_source.token_kind(self.token_pos + 1); - let c3 = self.token_source.token_kind(self.token_pos + 2); - if self.token_source.is_token_joint_to_next(self.token_pos) - && self.token_source.is_token_joint_to_next(self.token_pos + 1) - { - Some((c1, c2, c3)) - } else { - None - } - } - - /// Lookahead operation: returns the kind of the next nth - /// token. - pub(crate) fn nth(&self, n: usize) -> SyntaxKind { - let steps = self.steps.get(); - assert!(steps <= 10_000_000, "the parser seems stuck"); - self.steps.set(steps + 1); - self.token_source.token_kind(self.token_pos + n) - } - - /// Checks if the current token is `kind`. - pub(crate) fn at(&self, kind: SyntaxKind) -> bool { - self.current() == kind - } - - /// Checks if the current token is in `kinds`. - pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool { - kinds.contains(self.current()) - } - - /// Checks if the current token is contextual keyword with text `t`. - pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { - self.token_source.is_keyword(self.token_pos, kw) - } - - /// Starts a new node in the syntax tree. All nodes and tokens - /// consumed between the `start` and the corresponding `Marker::complete` - /// belong to the same node. - pub(crate) fn start(&mut self) -> Marker { - let pos = self.events.len() as u32; - self.push_event(Event::tombstone()); - Marker::new(pos) - } - - /// Advances the parser by one token unconditionally. - pub(crate) fn bump(&mut self) { - let kind = self.nth(0); - if kind == EOF { - return; - } - self.do_bump(kind, 1); - } - - /// Advances the parser by one token, remapping its kind. - /// This is useful to create contextual keywords from - /// identifiers. For example, the lexer creates an `union` - /// *identifier* token, but the parser remaps it to the - /// `union` keyword, and keyword is what ends up in the - /// final tree. - pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { - if self.nth(0) == EOF { - // TODO: panic!? - return; - } - self.do_bump(kind, 1); - } - - /// Advances the parser by `n` tokens, remapping its kind. - /// This is useful to create compound tokens from parts. For - /// example, an `<<` token is two consecutive remapped `<` tokens - pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { - self.do_bump(kind, n); - } - - /// Emit error with the `message` - /// TODO: this should be much more fancy and support - /// structured errors with spans and notes, like rustc - /// does. - pub(crate) fn error>(&mut self, message: T) { - let msg = ParseError(message.into()); - self.push_event(Event::Error { msg }) - } - - /// Consume the next token if `kind` matches. - pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { - if !self.at(kind) { - return false; - } - self.bump(); - true - } - - /// Consume the next token if it is `kind` or emit an error - /// otherwise. - pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { - if self.eat(kind) { - return true; - } - self.error(format!("expected {:?}", kind)); - false - } - - /// Create an error node and consume the next token. - pub(crate) fn err_and_bump(&mut self, message: &str) { - self.err_recover(message, TokenSet::empty()); - } - - /// Create an error node and consume the next token. - pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) { - if self.at(SyntaxKind::L_CURLY) || self.at(SyntaxKind::R_CURLY) || self.at_ts(recovery) { - self.error(message); - } else { - let m = self.start(); - self.error(message); - self.bump(); - m.complete(self, ERROR); - }; - } - - fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { - self.token_pos += usize::from(n_raw_tokens); - self.push_event(Event::Token { kind, n_raw_tokens }); - } - - fn push_event(&mut self, event: Event) { - self.events.push(event) - } -} - -/// See `Parser::start`. -pub(crate) struct Marker { - pos: u32, - bomb: DropBomb, -} - -impl Marker { - fn new(pos: u32) -> Marker { - Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") } - } - - /// Finishes the syntax tree node and assigns `kind` to it, - /// and mark the create a `CompletedMarker` for possible future - /// operation like `.precede()` to deal with forward_parent. - pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { - self.bomb.defuse(); - let idx = self.pos as usize; - match p.events[idx] { - Event::Start { kind: ref mut slot, .. } => { - *slot = kind; - } - _ => unreachable!(), - } - p.push_event(Event::Finish); - CompletedMarker::new(self.pos, kind) - } - - /// Abandons the syntax tree node. All its children - /// are attached to its parent instead. - pub(crate) fn abandon(mut self, p: &mut Parser) { - self.bomb.defuse(); - let idx = self.pos as usize; - if idx == p.events.len() - 1 { - match p.events.pop() { - Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), - _ => unreachable!(), - } - } - } -} - -pub(crate) struct CompletedMarker(u32, SyntaxKind); - -impl CompletedMarker { - fn new(pos: u32, kind: SyntaxKind) -> Self { - CompletedMarker(pos, kind) - } - - /// This method allows to create a new node which starts - /// *before* the current one. That is, parser could start - /// node `A`, then complete it, and then after parsing the - /// whole `A`, decide that it should have started some node - /// `B` before starting `A`. `precede` allows to do exactly - /// that. See also docs about `forward_parent` in `Event::Start`. - /// - /// Given completed events `[START, FINISH]` and its corresponding - /// `CompletedMarker(pos: 0, _)`. - /// Append a new `START` events as `[START, FINISH, NEWSTART]`, - /// then mark `NEWSTART` as `START`'s parent with saving its relative - /// distance to `NEWSTART` into forward_parent(=2 in this case); - pub(crate) fn precede(self, p: &mut Parser) -> Marker { - let new_pos = p.start(); - let idx = self.0 as usize; - match p.events[idx] { - Event::Start { ref mut forward_parent, .. } => { - *forward_parent = Some(new_pos.pos - self.0); - } - _ => unreachable!(), - } - new_pos - } - - pub(crate) fn kind(&self) -> SyntaxKind { - self.1 - } -} diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs deleted file mode 100644 index 6eed0e656..000000000 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ /dev/null @@ -1,25 +0,0 @@ -pub(super) mod event; -pub(super) mod input; - -use crate::parsing::{ - TreeSink, TokenSource, - lexer::Token, - parser_api::Parser, - parser_impl::event::EventProcessor, -}; - -/// Parse a sequence of tokens into the representative node tree -pub(super) fn parse_with( - sink: S, - text: &str, - tokens: &[Token], - parser: fn(&mut Parser), -) -> S::Tree { - let mut events = { - let input = input::ParserInput::new(text, tokens); - let mut parser_api = Parser::new(&input); - parser(&mut parser_api); - parser_api.finish() - }; - EventProcessor::new(sink, text, tokens, &mut events).process().finish() -} diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index f45326dff..674b15f9a 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -6,7 +6,7 @@ use crate::{ parsing::{ grammar, parse_with, builder::GreenBuilder, - parser_api::Parser, + parser::Parser, lexer::{tokenize, Token}, } }; -- cgit v1.2.3 From 882c47f1870f15cb2aaad8871ccbad1c51520f49 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 23:17:07 +0300 Subject: move syntax error to parser --- crates/ra_syntax/src/parsing.rs | 9 ++++++--- crates/ra_syntax/src/parsing/builder.rs | 19 +++++++++++++------ crates/ra_syntax/src/parsing/event.rs | 11 ++--------- crates/ra_syntax/src/parsing/parser.rs | 3 +-- crates/ra_syntax/src/parsing/reparsing.rs | 2 +- crates/ra_syntax/src/syntax_error.rs | 5 +---- 6 files changed, 24 insertions(+), 25 deletions(-) diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 941ec501e..138d1394a 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -9,7 +9,7 @@ mod grammar; mod reparsing; use crate::{ - SyntaxError, SyntaxKind, SmolStr, + SyntaxKind, SmolStr, SyntaxError, parsing::{ builder::GreenBuilder, input::ParserInput, @@ -21,11 +21,14 @@ use crate::{ pub use self::lexer::{tokenize, Token}; +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ParseError(pub String); + pub(crate) use self::reparsing::incremental_reparse; pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec) { let tokens = tokenize(&text); - parse_with(GreenBuilder::new(), text, &tokens, grammar::root) + parse_with(GreenBuilder::default(), text, &tokens, grammar::root) } fn parse_with( @@ -57,7 +60,7 @@ trait TreeSink { /// branch as current. fn finish_branch(&mut self); - fn error(&mut self, error: SyntaxError); + fn error(&mut self, error: ParseError); /// Complete tree building. Make sure that /// `start_branch` and `finish_branch` calls diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs index a05e7f84b..ee0e2cce7 100644 --- a/crates/ra_syntax/src/parsing/builder.rs +++ b/crates/ra_syntax/src/parsing/builder.rs @@ -1,19 +1,24 @@ use crate::{ - parsing::TreeSink, + SmolStr, SyntaxKind, SyntaxError, SyntaxErrorKind, TextUnit, + parsing::{TreeSink, ParseError}, syntax_node::{GreenNode, RaTypes}, - SmolStr, SyntaxKind, SyntaxError, }; use rowan::GreenNodeBuilder; pub(crate) struct GreenBuilder { + text_pos: TextUnit, errors: Vec, inner: GreenNodeBuilder, } -impl GreenBuilder { - pub(crate) fn new() -> GreenBuilder { - GreenBuilder { errors: Vec::new(), inner: GreenNodeBuilder::new() } +impl Default for GreenBuilder { + fn default() -> GreenBuilder { + GreenBuilder { + text_pos: TextUnit::default(), + errors: Vec::new(), + inner: GreenNodeBuilder::new(), + } } } @@ -21,6 +26,7 @@ impl TreeSink for GreenBuilder { type Tree = (GreenNode, Vec); fn leaf(&mut self, kind: SyntaxKind, text: SmolStr) { + self.text_pos += TextUnit::of_str(text.as_str()); self.inner.leaf(kind, text); } @@ -32,7 +38,8 @@ impl TreeSink for GreenBuilder { self.inner.finish_internal(); } - fn error(&mut self, error: SyntaxError) { + fn error(&mut self, error: ParseError) { + let error = SyntaxError::new(SyntaxErrorKind::ParseError(error), self.text_pos); self.errors.push(error) } diff --git a/crates/ra_syntax/src/parsing/event.rs b/crates/ra_syntax/src/parsing/event.rs index 893a42e9a..f6f020eab 100644 --- a/crates/ra_syntax/src/parsing/event.rs +++ b/crates/ra_syntax/src/parsing/event.rs @@ -13,14 +13,9 @@ use crate::{ SmolStr, SyntaxKind::{self, *}, TextRange, TextUnit, - syntax_error::{ - ParseError, - SyntaxError, - SyntaxErrorKind, - }, parsing::{ + ParseError, TreeSink, lexer::Token, - TreeSink, }, }; @@ -159,9 +154,7 @@ impl<'a, S: TreeSink> EventProcessor<'a, S> { .sum::(); self.leaf(kind, len, n_raw_tokens); } - Event::Error { msg } => self - .sink - .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)), + Event::Error { msg } => self.sink.error(msg), } } self.sink diff --git a/crates/ra_syntax/src/parsing/parser.rs b/crates/ra_syntax/src/parsing/parser.rs index 988fcb518..923b0f2b2 100644 --- a/crates/ra_syntax/src/parsing/parser.rs +++ b/crates/ra_syntax/src/parsing/parser.rs @@ -3,10 +3,9 @@ use std::cell::Cell; use drop_bomb::DropBomb; use crate::{ - syntax_error::ParseError, SyntaxKind::{self, ERROR, EOF, TOMBSTONE}, parsing::{ - TokenSource, + TokenSource, ParseError, token_set::TokenSet, event::Event, }, diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 674b15f9a..f2d218ab9 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -61,7 +61,7 @@ fn reparse_block<'node>( if !is_balanced(&tokens) { return None; } - let (green, new_errors) = parse_with(GreenBuilder::new(), &text, &tokens, reparser); + let (green, new_errors) = parse_with(GreenBuilder::default(), &text, &tokens, reparser); Some((node, green, new_errors)) } diff --git a/crates/ra_syntax/src/syntax_error.rs b/crates/ra_syntax/src/syntax_error.rs index 4ff998090..1a00fcc27 100644 --- a/crates/ra_syntax/src/syntax_error.rs +++ b/crates/ra_syntax/src/syntax_error.rs @@ -1,6 +1,6 @@ use std::fmt; -use crate::{TextRange, TextUnit}; +use crate::{TextRange, TextUnit, parsing::ParseError}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct SyntaxError { @@ -95,9 +95,6 @@ pub enum SyntaxErrorKind { InvalidMatchInnerAttr, } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ParseError(pub String); - impl fmt::Display for SyntaxErrorKind { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use self::SyntaxErrorKind::*; -- cgit v1.2.3