From cce23fddba4241202ebd29cce44db4ce9a08793a Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 22:52:32 +0300 Subject: flattern module structure --- crates/ra_syntax/src/parsing.rs | 29 ++- crates/ra_syntax/src/parsing/event.rs | 254 ++++++++++++++++++++++ crates/ra_syntax/src/parsing/input.rs | 72 ++++++ crates/ra_syntax/src/parsing/parser_api.rs | 2 +- crates/ra_syntax/src/parsing/parser_impl/event.rs | 254 ---------------------- crates/ra_syntax/src/parsing/parser_impl/input.rs | 73 ------- crates/ra_syntax/src/parsing/reparsing.rs | 6 +- 7 files changed, 353 insertions(+), 337 deletions(-) create mode 100644 crates/ra_syntax/src/parsing/event.rs create mode 100644 crates/ra_syntax/src/parsing/input.rs delete mode 100644 crates/ra_syntax/src/parsing/parser_impl/event.rs delete mode 100644 crates/ra_syntax/src/parsing/parser_impl/input.rs (limited to 'crates') diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 6c2c5f78b..f74c365d5 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -2,14 +2,20 @@ mod token_set; mod builder; mod lexer; -mod parser_impl; +mod event; +mod input; mod parser_api; mod grammar; mod reparsing; use crate::{ SyntaxError, SyntaxKind, SmolStr, - parsing::builder::GreenBuilder, + parsing::{ + builder::GreenBuilder, + input::ParserInput, + event::EventProcessor, + parser_api::Parser, + }, syntax_node::GreenNode, }; @@ -19,9 +25,22 @@ pub(crate) use self::reparsing::incremental_reparse; pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec) { let tokens = tokenize(&text); - let (green, errors) = - parser_impl::parse_with(GreenBuilder::new(), text, &tokens, grammar::root); - (green, errors) + parse_with(GreenBuilder::new(), text, &tokens, grammar::root) +} + +fn parse_with( + tree_sink: S, + text: &str, + tokens: &[Token], + f: fn(&mut Parser), +) -> S::Tree { + let mut events = { + let input = ParserInput::new(text, &tokens); + let mut p = Parser::new(&input); + f(&mut p); + p.finish() + }; + EventProcessor::new(tree_sink, text, tokens, &mut events).process().finish() } /// `TreeSink` abstracts details of a particular syntax tree implementation. diff --git a/crates/ra_syntax/src/parsing/event.rs b/crates/ra_syntax/src/parsing/event.rs new file mode 100644 index 000000000..893a42e9a --- /dev/null +++ b/crates/ra_syntax/src/parsing/event.rs @@ -0,0 +1,254 @@ +//! This module provides a way to construct a `File`. +//! It is intended to be completely decoupled from the +//! parser, so as to allow to evolve the tree representation +//! and the parser algorithm independently. +//! +//! The `TreeSink` trait is the bridge between the parser and the +//! tree builder: the parser produces a stream of events like +//! `start node`, `finish node`, and `FileBuilder` converts +//! this stream to a real tree. +use std::mem; + +use crate::{ + SmolStr, + SyntaxKind::{self, *}, + TextRange, TextUnit, + syntax_error::{ + ParseError, + SyntaxError, + SyntaxErrorKind, + }, + parsing::{ + lexer::Token, + TreeSink, + }, +}; + +/// `Parser` produces a flat list of `Event`s. +/// They are converted to a tree-structure in +/// a separate pass, via `TreeBuilder`. +#[derive(Debug)] +pub(crate) enum Event { + /// This event signifies the start of the node. + /// It should be either abandoned (in which case the + /// `kind` is `TOMBSTONE`, and the event is ignored), + /// or completed via a `Finish` event. + /// + /// All tokens between a `Start` and a `Finish` would + /// become the children of the respective node. + /// + /// For left-recursive syntactic constructs, the parser produces + /// a child node before it sees a parent. `forward_parent` + /// saves the position of current event's parent. + /// + /// Consider this path + /// + /// foo::bar + /// + /// The events for it would look like this: + /// + /// + /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH + /// | /\ + /// | | + /// +------forward-parent------+ + /// + /// And the tree would look like this + /// + /// +--PATH---------+ + /// | | | + /// | | | + /// | '::' 'bar' + /// | + /// PATH + /// | + /// 'foo' + /// + /// See also `CompletedMarker::precede`. + Start { + kind: SyntaxKind, + forward_parent: Option, + }, + + /// Complete the previous `Start` event + Finish, + + /// Produce a single leaf-element. + /// `n_raw_tokens` is used to glue complex contextual tokens. + /// For example, lexer tokenizes `>>` as `>`, `>`, and + /// `n_raw_tokens = 2` is used to produced a single `>>`. + Token { + kind: SyntaxKind, + n_raw_tokens: u8, + }, + + Error { + msg: ParseError, + }, +} + +impl Event { + pub(crate) fn tombstone() -> Self { + Event::Start { kind: TOMBSTONE, forward_parent: None } + } +} + +pub(super) struct EventProcessor<'a, S: TreeSink> { + sink: S, + text_pos: TextUnit, + text: &'a str, + token_pos: usize, + tokens: &'a [Token], + events: &'a mut [Event], +} + +impl<'a, S: TreeSink> EventProcessor<'a, S> { + pub(super) fn new( + sink: S, + text: &'a str, + tokens: &'a [Token], + events: &'a mut [Event], + ) -> EventProcessor<'a, S> { + EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events } + } + + /// Generate the syntax tree with the control of events. + pub(crate) fn process(mut self) -> S { + let mut forward_parents = Vec::new(); + + for i in 0..self.events.len() { + match mem::replace(&mut self.events[i], Event::tombstone()) { + Event::Start { kind: TOMBSTONE, .. } => (), + + Event::Start { kind, forward_parent } => { + // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, + // in the normal control flow, the parent-child relation: `A -> B -> C`, + // while with the magic forward_parent, it writes: `C <- B <- A`. + + // append `A` into parents. + forward_parents.push(kind); + let mut idx = i; + let mut fp = forward_parent; + while let Some(fwd) = fp { + idx += fwd as usize; + // append `A`'s forward_parent `B` + fp = match mem::replace(&mut self.events[idx], Event::tombstone()) { + Event::Start { kind, forward_parent } => { + forward_parents.push(kind); + forward_parent + } + _ => unreachable!(), + }; + // append `B`'s forward_parent `C` in the next stage. + } + + for kind in forward_parents.drain(..).rev() { + self.start(kind); + } + } + Event::Finish => { + let is_last = i == self.events.len() - 1; + self.finish(is_last); + } + Event::Token { kind, n_raw_tokens } => { + self.eat_trivias(); + let n_raw_tokens = n_raw_tokens as usize; + let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens] + .iter() + .map(|it| it.len) + .sum::(); + self.leaf(kind, len, n_raw_tokens); + } + Event::Error { msg } => self + .sink + .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)), + } + } + self.sink + } + + /// Add the node into syntax tree but discard the comments/whitespaces. + fn start(&mut self, kind: SyntaxKind) { + if kind == SOURCE_FILE { + self.sink.start_branch(kind); + return; + } + let n_trivias = + self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); + let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; + let mut trivia_end = + self.text_pos + leading_trivias.iter().map(|it| it.len).sum::(); + + let n_attached_trivias = { + let leading_trivias = leading_trivias.iter().rev().map(|it| { + let next_end = trivia_end - it.len; + let range = TextRange::from_to(next_end, trivia_end); + trivia_end = next_end; + (it.kind, &self.text[range]) + }); + n_attached_trivias(kind, leading_trivias) + }; + self.eat_n_trivias(n_trivias - n_attached_trivias); + self.sink.start_branch(kind); + self.eat_n_trivias(n_attached_trivias); + } + + fn finish(&mut self, is_last: bool) { + if is_last { + self.eat_trivias() + } + self.sink.finish_branch(); + } + + fn eat_trivias(&mut self) { + while let Some(&token) = self.tokens.get(self.token_pos) { + if !token.kind.is_trivia() { + break; + } + self.leaf(token.kind, token.len, 1); + } + } + + fn eat_n_trivias(&mut self, n: usize) { + for _ in 0..n { + let token = self.tokens[self.token_pos]; + assert!(token.kind.is_trivia()); + self.leaf(token.kind, token.len, 1); + } + } + + fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { + let range = TextRange::offset_len(self.text_pos, len); + let text: SmolStr = self.text[range].into(); + self.text_pos += len; + self.token_pos += n_tokens; + self.sink.leaf(kind, text); + } +} + +fn n_attached_trivias<'a>( + kind: SyntaxKind, + trivias: impl Iterator, +) -> usize { + match kind { + CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF + | MODULE | NAMED_FIELD_DEF => { + let mut res = 0; + for (i, (kind, text)) in trivias.enumerate() { + match kind { + WHITESPACE => { + if text.contains("\n\n") { + break; + } + } + COMMENT => { + res = i + 1; + } + _ => (), + } + } + res + } + _ => 0, + } +} diff --git a/crates/ra_syntax/src/parsing/input.rs b/crates/ra_syntax/src/parsing/input.rs new file mode 100644 index 000000000..0f1810df5 --- /dev/null +++ b/crates/ra_syntax/src/parsing/input.rs @@ -0,0 +1,72 @@ +use crate::{ + SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, + parsing::{ + TokenPos, TokenSource, + lexer::Token, + }, +}; + +impl<'t> TokenSource for ParserInput<'t> { + fn token_kind(&self, pos: TokenPos) -> SyntaxKind { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return EOF; + } + self.tokens[idx].kind + } + fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { + let idx_curr = pos.0 as usize; + let idx_next = pos.0 as usize + 1; + if !(idx_next < self.tokens.len()) { + return true; + } + self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] + } + fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return false; + } + let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); + + self.text[range] == *kw + } +} + +pub(crate) struct ParserInput<'t> { + text: &'t str, + /// start position of each token(expect whitespace and comment) + /// ```non-rust + /// struct Foo; + /// ^------^--- + /// | | ^- + /// 0 7 10 + /// ``` + /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` + start_offsets: Vec, + /// non-whitespace/comment tokens + /// ```non-rust + /// struct Foo {} + /// ^^^^^^ ^^^ ^^ + /// ``` + /// tokens: `[struct, Foo, {, }]` + tokens: Vec, +} + +impl<'t> ParserInput<'t> { + /// Generate input from tokens(expect comment and whitespace). + pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { + let mut tokens = Vec::new(); + let mut start_offsets = Vec::new(); + let mut len = 0.into(); + for &token in raw_tokens.iter() { + if !token.kind.is_trivia() { + tokens.push(token); + start_offsets.push(len); + } + len += token.len; + } + + ParserInput { text, start_offsets, tokens } + } +} diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index 92d7895d3..99f6183a4 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -8,7 +8,7 @@ use crate::{ parsing::{ TokenSource, TokenPos, token_set::TokenSet, - parser_impl::event::Event, + event::Event, }, }; diff --git a/crates/ra_syntax/src/parsing/parser_impl/event.rs b/crates/ra_syntax/src/parsing/parser_impl/event.rs deleted file mode 100644 index 9663fba35..000000000 --- a/crates/ra_syntax/src/parsing/parser_impl/event.rs +++ /dev/null @@ -1,254 +0,0 @@ -//! This module provides a way to construct a `File`. -//! It is intended to be completely decoupled from the -//! parser, so as to allow to evolve the tree representation -//! and the parser algorithm independently. -//! -//! The `TreeSink` trait is the bridge between the parser and the -//! tree builder: the parser produces a stream of events like -//! `start node`, `finish node`, and `FileBuilder` converts -//! this stream to a real tree. -use std::mem; - -use crate::{ - SmolStr, - SyntaxKind::{self, *}, - TextRange, TextUnit, - syntax_error::{ - ParseError, - SyntaxError, - SyntaxErrorKind, - }, - parsing::{ - lexer::Token, - parser_impl::TreeSink, - }, -}; - -/// `Parser` produces a flat list of `Event`s. -/// They are converted to a tree-structure in -/// a separate pass, via `TreeBuilder`. -#[derive(Debug)] -pub(crate) enum Event { - /// This event signifies the start of the node. - /// It should be either abandoned (in which case the - /// `kind` is `TOMBSTONE`, and the event is ignored), - /// or completed via a `Finish` event. - /// - /// All tokens between a `Start` and a `Finish` would - /// become the children of the respective node. - /// - /// For left-recursive syntactic constructs, the parser produces - /// a child node before it sees a parent. `forward_parent` - /// saves the position of current event's parent. - /// - /// Consider this path - /// - /// foo::bar - /// - /// The events for it would look like this: - /// - /// - /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH - /// | /\ - /// | | - /// +------forward-parent------+ - /// - /// And the tree would look like this - /// - /// +--PATH---------+ - /// | | | - /// | | | - /// | '::' 'bar' - /// | - /// PATH - /// | - /// 'foo' - /// - /// See also `CompletedMarker::precede`. - Start { - kind: SyntaxKind, - forward_parent: Option, - }, - - /// Complete the previous `Start` event - Finish, - - /// Produce a single leaf-element. - /// `n_raw_tokens` is used to glue complex contextual tokens. - /// For example, lexer tokenizes `>>` as `>`, `>`, and - /// `n_raw_tokens = 2` is used to produced a single `>>`. - Token { - kind: SyntaxKind, - n_raw_tokens: u8, - }, - - Error { - msg: ParseError, - }, -} - -impl Event { - pub(crate) fn tombstone() -> Self { - Event::Start { kind: TOMBSTONE, forward_parent: None } - } -} - -pub(super) struct EventProcessor<'a, S: TreeSink> { - sink: S, - text_pos: TextUnit, - text: &'a str, - token_pos: usize, - tokens: &'a [Token], - events: &'a mut [Event], -} - -impl<'a, S: TreeSink> EventProcessor<'a, S> { - pub(super) fn new( - sink: S, - text: &'a str, - tokens: &'a [Token], - events: &'a mut [Event], - ) -> EventProcessor<'a, S> { - EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events } - } - - /// Generate the syntax tree with the control of events. - pub(super) fn process(mut self) -> S { - let mut forward_parents = Vec::new(); - - for i in 0..self.events.len() { - match mem::replace(&mut self.events[i], Event::tombstone()) { - Event::Start { kind: TOMBSTONE, .. } => (), - - Event::Start { kind, forward_parent } => { - // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, - // in the normal control flow, the parent-child relation: `A -> B -> C`, - // while with the magic forward_parent, it writes: `C <- B <- A`. - - // append `A` into parents. - forward_parents.push(kind); - let mut idx = i; - let mut fp = forward_parent; - while let Some(fwd) = fp { - idx += fwd as usize; - // append `A`'s forward_parent `B` - fp = match mem::replace(&mut self.events[idx], Event::tombstone()) { - Event::Start { kind, forward_parent } => { - forward_parents.push(kind); - forward_parent - } - _ => unreachable!(), - }; - // append `B`'s forward_parent `C` in the next stage. - } - - for kind in forward_parents.drain(..).rev() { - self.start(kind); - } - } - Event::Finish => { - let is_last = i == self.events.len() - 1; - self.finish(is_last); - } - Event::Token { kind, n_raw_tokens } => { - self.eat_trivias(); - let n_raw_tokens = n_raw_tokens as usize; - let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens] - .iter() - .map(|it| it.len) - .sum::(); - self.leaf(kind, len, n_raw_tokens); - } - Event::Error { msg } => self - .sink - .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)), - } - } - self.sink - } - - /// Add the node into syntax tree but discard the comments/whitespaces. - fn start(&mut self, kind: SyntaxKind) { - if kind == SOURCE_FILE { - self.sink.start_branch(kind); - return; - } - let n_trivias = - self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); - let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; - let mut trivia_end = - self.text_pos + leading_trivias.iter().map(|it| it.len).sum::(); - - let n_attached_trivias = { - let leading_trivias = leading_trivias.iter().rev().map(|it| { - let next_end = trivia_end - it.len; - let range = TextRange::from_to(next_end, trivia_end); - trivia_end = next_end; - (it.kind, &self.text[range]) - }); - n_attached_trivias(kind, leading_trivias) - }; - self.eat_n_trivias(n_trivias - n_attached_trivias); - self.sink.start_branch(kind); - self.eat_n_trivias(n_attached_trivias); - } - - fn finish(&mut self, is_last: bool) { - if is_last { - self.eat_trivias() - } - self.sink.finish_branch(); - } - - fn eat_trivias(&mut self) { - while let Some(&token) = self.tokens.get(self.token_pos) { - if !token.kind.is_trivia() { - break; - } - self.leaf(token.kind, token.len, 1); - } - } - - fn eat_n_trivias(&mut self, n: usize) { - for _ in 0..n { - let token = self.tokens[self.token_pos]; - assert!(token.kind.is_trivia()); - self.leaf(token.kind, token.len, 1); - } - } - - fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { - let range = TextRange::offset_len(self.text_pos, len); - let text: SmolStr = self.text[range].into(); - self.text_pos += len; - self.token_pos += n_tokens; - self.sink.leaf(kind, text); - } -} - -fn n_attached_trivias<'a>( - kind: SyntaxKind, - trivias: impl Iterator, -) -> usize { - match kind { - CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF - | MODULE | NAMED_FIELD_DEF => { - let mut res = 0; - for (i, (kind, text)) in trivias.enumerate() { - match kind { - WHITESPACE => { - if text.contains("\n\n") { - break; - } - } - COMMENT => { - res = i + 1; - } - _ => (), - } - } - res - } - _ => 0, - } -} diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs deleted file mode 100644 index 11b32b9ce..000000000 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ /dev/null @@ -1,73 +0,0 @@ -use crate::{ - SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, - parsing::{ - TokenPos, - parser_impl::TokenSource, - lexer::Token, - }, -}; - -impl<'t> TokenSource for ParserInput<'t> { - fn token_kind(&self, pos: TokenPos) -> SyntaxKind { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return EOF; - } - self.tokens[idx].kind - } - fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { - let idx_curr = pos.0 as usize; - let idx_next = pos.0 as usize + 1; - if !(idx_next < self.tokens.len()) { - return true; - } - self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] - } - fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return false; - } - let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); - - self.text[range] == *kw - } -} - -pub(crate) struct ParserInput<'t> { - text: &'t str, - /// start position of each token(expect whitespace and comment) - /// ```non-rust - /// struct Foo; - /// ^------^--- - /// | | ^- - /// 0 7 10 - /// ``` - /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` - start_offsets: Vec, - /// non-whitespace/comment tokens - /// ```non-rust - /// struct Foo {} - /// ^^^^^^ ^^^ ^^ - /// ``` - /// tokens: `[struct, Foo, {, }]` - tokens: Vec, -} - -impl<'t> ParserInput<'t> { - /// Generate input from tokens(expect comment and whitespace). - pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { - let mut tokens = Vec::new(); - let mut start_offsets = Vec::new(); - let mut len = 0.into(); - for &token in raw_tokens.iter() { - if !token.kind.is_trivia() { - tokens.push(token); - start_offsets.push(len); - } - len += token.len; - } - - ParserInput { text, start_offsets, tokens } - } -} diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index edf3fa291..f45326dff 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -4,8 +4,7 @@ use crate::{ syntax_node::{GreenNode, SyntaxNode}, syntax_error::SyntaxError, parsing::{ - grammar, - parser_impl, + grammar, parse_with, builder::GreenBuilder, parser_api::Parser, lexer::{tokenize, Token}, @@ -62,8 +61,7 @@ fn reparse_block<'node>( if !is_balanced(&tokens) { return None; } - let (green, new_errors) = - parser_impl::parse_with(GreenBuilder::new(), &text, &tokens, reparser); + let (green, new_errors) = parse_with(GreenBuilder::new(), &text, &tokens, reparser); Some((node, green, new_errors)) } -- cgit v1.2.3