From 79ce0fa8d753ea00b5a98c7835be2f7dec7704f9 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 21 Feb 2019 12:03:42 +0300 Subject: move whitespace handling to tree builder --- crates/ra_syntax/src/parsing.rs | 22 ++-- crates/ra_syntax/src/parsing/builder.rs | 127 ++++++++++++++++---- crates/ra_syntax/src/parsing/event.rs | 193 ++++++------------------------ crates/ra_syntax/src/parsing/reparsing.rs | 5 +- 4 files changed, 159 insertions(+), 188 deletions(-) diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 138d1394a..7e1b32035 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -9,11 +9,11 @@ mod grammar; mod reparsing; use crate::{ - SyntaxKind, SmolStr, SyntaxError, + SyntaxKind, SyntaxError, parsing::{ - builder::GreenBuilder, + builder::TreeBuilder, input::ParserInput, - event::EventProcessor, + event::process, parser::Parser, }, syntax_node::GreenNode, @@ -28,22 +28,24 @@ pub(crate) use self::reparsing::incremental_reparse; pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec) { let tokens = tokenize(&text); - parse_with(GreenBuilder::default(), text, &tokens, grammar::root) + let tree_sink = TreeBuilder::new(text, &tokens); + parse_with(tree_sink, text, &tokens, grammar::root) } fn parse_with( - tree_sink: S, + mut tree_sink: S, text: &str, tokens: &[Token], f: fn(&mut Parser), ) -> S::Tree { - let mut events = { + let events = { let input = ParserInput::new(text, &tokens); let mut p = Parser::new(&input); f(&mut p); p.finish() }; - EventProcessor::new(tree_sink, text, tokens, &mut events).process().finish() + process(&mut tree_sink, events); + tree_sink.finish() } /// `TreeSink` abstracts details of a particular syntax tree implementation. @@ -51,14 +53,14 @@ trait TreeSink { type Tree; /// Adds new leaf to the current branch. - fn leaf(&mut self, kind: SyntaxKind, text: SmolStr); + fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8); /// Start new branch and make it current. - fn start_branch(&mut self, kind: SyntaxKind); + fn start_branch(&mut self, kind: SyntaxKind, root: bool); /// Finish current branch and restore previous /// branch as current. - fn finish_branch(&mut self); + fn finish_branch(&mut self, root: bool); fn error(&mut self, error: ParseError); diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs index ee0e2cce7..1041c6a7b 100644 --- a/crates/ra_syntax/src/parsing/builder.rs +++ b/crates/ra_syntax/src/parsing/builder.rs @@ -1,40 +1,63 @@ use crate::{ - SmolStr, SyntaxKind, SyntaxError, SyntaxErrorKind, TextUnit, - parsing::{TreeSink, ParseError}, + SmolStr, SyntaxError, SyntaxErrorKind, TextUnit, TextRange, + SyntaxKind::{self, *}, + parsing::{TreeSink, ParseError, Token}, syntax_node::{GreenNode, RaTypes}, }; use rowan::GreenNodeBuilder; -pub(crate) struct GreenBuilder { +pub(crate) struct TreeBuilder<'a> { + text: &'a str, + tokens: &'a [Token], text_pos: TextUnit, + token_pos: usize, errors: Vec, inner: GreenNodeBuilder, } -impl Default for GreenBuilder { - fn default() -> GreenBuilder { - GreenBuilder { - text_pos: TextUnit::default(), - errors: Vec::new(), - inner: GreenNodeBuilder::new(), - } - } -} - -impl TreeSink for GreenBuilder { +impl<'a> TreeSink for TreeBuilder<'a> { type Tree = (GreenNode, Vec); - fn leaf(&mut self, kind: SyntaxKind, text: SmolStr) { - self.text_pos += TextUnit::of_str(text.as_str()); - self.inner.leaf(kind, text); + fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) { + self.eat_trivias(); + let n_tokens = n_tokens as usize; + let len = self.tokens[self.token_pos..self.token_pos + n_tokens] + .iter() + .map(|it| it.len) + .sum::(); + self.do_leaf(kind, len, n_tokens); } - fn start_branch(&mut self, kind: SyntaxKind) { - self.inner.start_internal(kind) + fn start_branch(&mut self, kind: SyntaxKind, root: bool) { + if root { + self.inner.start_internal(kind); + return; + } + let n_trivias = + self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); + let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; + let mut trivia_end = + self.text_pos + leading_trivias.iter().map(|it| it.len).sum::(); + + let n_attached_trivias = { + let leading_trivias = leading_trivias.iter().rev().map(|it| { + let next_end = trivia_end - it.len; + let range = TextRange::from_to(next_end, trivia_end); + trivia_end = next_end; + (it.kind, &self.text[range]) + }); + n_attached_trivias(kind, leading_trivias) + }; + self.eat_n_trivias(n_trivias - n_attached_trivias); + self.inner.start_internal(kind); + self.eat_n_trivias(n_attached_trivias); } - fn finish_branch(&mut self) { + fn finish_branch(&mut self, root: bool) { + if root { + self.eat_trivias() + } self.inner.finish_internal(); } @@ -47,3 +70,67 @@ impl TreeSink for GreenBuilder { (self.inner.finish(), self.errors) } } + +impl<'a> TreeBuilder<'a> { + pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> TreeBuilder<'a> { + TreeBuilder { + text, + tokens, + text_pos: 0.into(), + token_pos: 0, + errors: Vec::new(), + inner: GreenNodeBuilder::new(), + } + } + fn eat_trivias(&mut self) { + while let Some(&token) = self.tokens.get(self.token_pos) { + if !token.kind.is_trivia() { + break; + } + self.do_leaf(token.kind, token.len, 1); + } + } + + fn eat_n_trivias(&mut self, n: usize) { + for _ in 0..n { + let token = self.tokens[self.token_pos]; + assert!(token.kind.is_trivia()); + self.do_leaf(token.kind, token.len, 1); + } + } + + fn do_leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { + let range = TextRange::offset_len(self.text_pos, len); + let text: SmolStr = self.text[range].into(); + self.text_pos += len; + self.token_pos += n_tokens; + self.inner.leaf(kind, text); + } +} + +fn n_attached_trivias<'a>( + kind: SyntaxKind, + trivias: impl Iterator, +) -> usize { + match kind { + CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF + | MODULE | NAMED_FIELD_DEF => { + let mut res = 0; + for (i, (kind, text)) in trivias.enumerate() { + match kind { + WHITESPACE => { + if text.contains("\n\n") { + break; + } + } + COMMENT => { + res = i + 1; + } + _ => (), + } + } + res + } + _ => 0, + } +} diff --git a/crates/ra_syntax/src/parsing/event.rs b/crates/ra_syntax/src/parsing/event.rs index f6f020eab..d6cbdffe0 100644 --- a/crates/ra_syntax/src/parsing/event.rs +++ b/crates/ra_syntax/src/parsing/event.rs @@ -10,13 +10,8 @@ use std::mem; use crate::{ - SmolStr, SyntaxKind::{self, *}, - TextRange, TextUnit, - parsing::{ - ParseError, TreeSink, - lexer::Token, - }, + parsing::{ParseError, TreeSink}, }; /// `Parser` produces a flat list of `Event`s. @@ -88,160 +83,46 @@ impl Event { } } -pub(super) struct EventProcessor<'a, S: TreeSink> { - sink: S, - text_pos: TextUnit, - text: &'a str, - token_pos: usize, - tokens: &'a [Token], - events: &'a mut [Event], -} - -impl<'a, S: TreeSink> EventProcessor<'a, S> { - pub(super) fn new( - sink: S, - text: &'a str, - tokens: &'a [Token], - events: &'a mut [Event], - ) -> EventProcessor<'a, S> { - EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events } - } - - /// Generate the syntax tree with the control of events. - pub(crate) fn process(mut self) -> S { - let mut forward_parents = Vec::new(); - - for i in 0..self.events.len() { - match mem::replace(&mut self.events[i], Event::tombstone()) { - Event::Start { kind: TOMBSTONE, .. } => (), - - Event::Start { kind, forward_parent } => { - // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, - // in the normal control flow, the parent-child relation: `A -> B -> C`, - // while with the magic forward_parent, it writes: `C <- B <- A`. - - // append `A` into parents. - forward_parents.push(kind); - let mut idx = i; - let mut fp = forward_parent; - while let Some(fwd) = fp { - idx += fwd as usize; - // append `A`'s forward_parent `B` - fp = match mem::replace(&mut self.events[idx], Event::tombstone()) { - Event::Start { kind, forward_parent } => { - forward_parents.push(kind); - forward_parent - } - _ => unreachable!(), - }; - // append `B`'s forward_parent `C` in the next stage. - } - - for kind in forward_parents.drain(..).rev() { - self.start(kind); - } - } - Event::Finish => { - let is_last = i == self.events.len() - 1; - self.finish(is_last); - } - Event::Token { kind, n_raw_tokens } => { - self.eat_trivias(); - let n_raw_tokens = n_raw_tokens as usize; - let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens] - .iter() - .map(|it| it.len) - .sum::(); - self.leaf(kind, len, n_raw_tokens); +/// Generate the syntax tree with the control of events. +pub(super) fn process(sink: &mut impl TreeSink, mut events: Vec) { + let mut forward_parents = Vec::new(); + + for i in 0..events.len() { + match mem::replace(&mut events[i], Event::tombstone()) { + Event::Start { kind: TOMBSTONE, .. } => (), + + Event::Start { kind, forward_parent } => { + // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, + // in the normal control flow, the parent-child relation: `A -> B -> C`, + // while with the magic forward_parent, it writes: `C <- B <- A`. + + // append `A` into parents. + forward_parents.push(kind); + let mut idx = i; + let mut fp = forward_parent; + while let Some(fwd) = fp { + idx += fwd as usize; + // append `A`'s forward_parent `B` + fp = match mem::replace(&mut events[idx], Event::tombstone()) { + Event::Start { kind, forward_parent } => { + forward_parents.push(kind); + forward_parent + } + _ => unreachable!(), + }; + // append `B`'s forward_parent `C` in the next stage. } - Event::Error { msg } => self.sink.error(msg), - } - } - self.sink - } - - /// Add the node into syntax tree but discard the comments/whitespaces. - fn start(&mut self, kind: SyntaxKind) { - if kind == SOURCE_FILE { - self.sink.start_branch(kind); - return; - } - let n_trivias = - self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); - let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; - let mut trivia_end = - self.text_pos + leading_trivias.iter().map(|it| it.len).sum::(); - let n_attached_trivias = { - let leading_trivias = leading_trivias.iter().rev().map(|it| { - let next_end = trivia_end - it.len; - let range = TextRange::from_to(next_end, trivia_end); - trivia_end = next_end; - (it.kind, &self.text[range]) - }); - n_attached_trivias(kind, leading_trivias) - }; - self.eat_n_trivias(n_trivias - n_attached_trivias); - self.sink.start_branch(kind); - self.eat_n_trivias(n_attached_trivias); - } - - fn finish(&mut self, is_last: bool) { - if is_last { - self.eat_trivias() - } - self.sink.finish_branch(); - } - - fn eat_trivias(&mut self) { - while let Some(&token) = self.tokens.get(self.token_pos) { - if !token.kind.is_trivia() { - break; - } - self.leaf(token.kind, token.len, 1); - } - } - - fn eat_n_trivias(&mut self, n: usize) { - for _ in 0..n { - let token = self.tokens[self.token_pos]; - assert!(token.kind.is_trivia()); - self.leaf(token.kind, token.len, 1); - } - } - - fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { - let range = TextRange::offset_len(self.text_pos, len); - let text: SmolStr = self.text[range].into(); - self.text_pos += len; - self.token_pos += n_tokens; - self.sink.leaf(kind, text); - } -} - -fn n_attached_trivias<'a>( - kind: SyntaxKind, - trivias: impl Iterator, -) -> usize { - match kind { - CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF - | MODULE | NAMED_FIELD_DEF => { - let mut res = 0; - for (i, (kind, text)) in trivias.enumerate() { - match kind { - WHITESPACE => { - if text.contains("\n\n") { - break; - } - } - COMMENT => { - res = i + 1; - } - _ => (), + for (j, kind) in forward_parents.drain(..).rev().enumerate() { + let is_root_node = i == 0 && j == 0; + sink.start_branch(kind, is_root_node); } } - res + Event::Finish => sink.finish_branch(i == events.len() - 1), + Event::Token { kind, n_raw_tokens } => { + sink.leaf(kind, n_raw_tokens); + } + Event::Error { msg } => sink.error(msg), } - _ => 0, } } diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index f2d218ab9..f4c2251d7 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -5,7 +5,7 @@ use crate::{ syntax_error::SyntaxError, parsing::{ grammar, parse_with, - builder::GreenBuilder, + builder::TreeBuilder, parser::Parser, lexer::{tokenize, Token}, } @@ -61,7 +61,8 @@ fn reparse_block<'node>( if !is_balanced(&tokens) { return None; } - let (green, new_errors) = parse_with(GreenBuilder::default(), &text, &tokens, reparser); + let tree_sink = TreeBuilder::new(&text, &tokens); + let (green, new_errors) = parse_with(tree_sink, &text, &tokens, reparser); Some((node, green, new_errors)) } -- cgit v1.2.3