From 3517c175ac537b47dd3e36cc7fb1edd60b02c039 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 21:08:59 +0300 Subject: rename Sink -> TreeSink --- crates/ra_syntax/src/parsing/parser_impl/event.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'crates/ra_syntax/src/parsing/parser_impl') diff --git a/crates/ra_syntax/src/parsing/parser_impl/event.rs b/crates/ra_syntax/src/parsing/parser_impl/event.rs index 2ddbdd34d..9663fba35 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/event.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/event.rs @@ -3,7 +3,7 @@ //! parser, so as to allow to evolve the tree representation //! and the parser algorithm independently. //! -//! The `Sink` trait is the bridge between the parser and the +//! The `TreeSink` trait is the bridge between the parser and the //! tree builder: the parser produces a stream of events like //! `start node`, `finish node`, and `FileBuilder` converts //! this stream to a real tree. @@ -20,7 +20,7 @@ use crate::{ }, parsing::{ lexer::Token, - parser_impl::Sink, + parser_impl::TreeSink, }, }; @@ -93,7 +93,7 @@ impl Event { } } -pub(super) struct EventProcessor<'a, S: Sink> { +pub(super) struct EventProcessor<'a, S: TreeSink> { sink: S, text_pos: TextUnit, text: &'a str, @@ -102,7 +102,7 @@ pub(super) struct EventProcessor<'a, S: Sink> { events: &'a mut [Event], } -impl<'a, S: Sink> EventProcessor<'a, S> { +impl<'a, S: TreeSink> EventProcessor<'a, S> { pub(super) fn new( sink: S, text: &'a str, -- cgit v1.2.3 From 0c81b9deeed81bfb2cf8142af9d748317d5d71a1 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 21:50:07 +0300 Subject: route parsing via TokenSource trait --- crates/ra_syntax/src/parsing/parser_impl/input.rs | 69 ++++++++++------------- 1 file changed, 31 insertions(+), 38 deletions(-) (limited to 'crates/ra_syntax/src/parsing/parser_impl') diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs index 275d94918..8ebbd3825 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs @@ -1,10 +1,40 @@ use crate::{ SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, - parsing::lexer::Token, + parsing::{ + parser_impl::TokenSource, + lexer::Token, + }, }; use std::ops::{Add, AddAssign}; +impl<'t> TokenSource for ParserInput<'t> { + fn token_kind(&self, pos: InputPosition) -> SyntaxKind { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return EOF; + } + self.tokens[idx].kind + } + fn is_token_joint_to_next(&self, pos: InputPosition) -> bool { + let idx_curr = pos.0 as usize; + let idx_next = pos.0 as usize; + if !(idx_next < self.tokens.len()) { + return true; + } + self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] + } + fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return false; + } + let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); + + self.text[range] == *kw + } +} + pub(crate) struct ParserInput<'t> { text: &'t str, /// start position of each token(expect whitespace and comment) @@ -41,43 +71,6 @@ impl<'t> ParserInput<'t> { ParserInput { text, start_offsets, tokens } } - - /// Get the syntax kind of token at given input position. - pub fn kind(&self, pos: InputPosition) -> SyntaxKind { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return EOF; - } - self.tokens[idx].kind - } - - /// Get the length of a token at given input position. - pub fn token_len(&self, pos: InputPosition) -> TextUnit { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return 0.into(); - } - self.tokens[idx].len - } - - /// Get the start position of a taken at given input position. - pub fn token_start_at(&self, pos: InputPosition) -> TextUnit { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return 0.into(); - } - self.start_offsets[idx] - } - - /// Get the raw text of a token at given input position. - pub fn token_text(&self, pos: InputPosition) -> &'t str { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return ""; - } - let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); - &self.text[range] - } } #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -- cgit v1.2.3 From 2b5e336ce7172914686b33c8ac1522911366fcf0 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 22:19:12 +0300 Subject: move abstract traits to top --- crates/ra_syntax/src/parsing/parser_impl/input.rs | 32 +++-------------------- 1 file changed, 4 insertions(+), 28 deletions(-) (limited to 'crates/ra_syntax/src/parsing/parser_impl') diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs index 8ebbd3825..e9735e526 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs @@ -1,22 +1,21 @@ use crate::{ SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, parsing::{ + TokenPos, parser_impl::TokenSource, lexer::Token, }, }; -use std::ops::{Add, AddAssign}; - impl<'t> TokenSource for ParserInput<'t> { - fn token_kind(&self, pos: InputPosition) -> SyntaxKind { + fn token_kind(&self, pos: TokenPos) -> SyntaxKind { let idx = pos.0 as usize; if !(idx < self.tokens.len()) { return EOF; } self.tokens[idx].kind } - fn is_token_joint_to_next(&self, pos: InputPosition) -> bool { + fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { let idx_curr = pos.0 as usize; let idx_next = pos.0 as usize; if !(idx_next < self.tokens.len()) { @@ -24,7 +23,7 @@ impl<'t> TokenSource for ParserInput<'t> { } self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] } - fn is_keyword(&self, pos: InputPosition, kw: &str) -> bool { + fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool { let idx = pos.0 as usize; if !(idx < self.tokens.len()) { return false; @@ -72,26 +71,3 @@ impl<'t> ParserInput<'t> { ParserInput { text, start_offsets, tokens } } } - -#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub(crate) struct InputPosition(u32); - -impl InputPosition { - pub fn new() -> Self { - InputPosition(0) - } -} - -impl Add for InputPosition { - type Output = InputPosition; - - fn add(self, rhs: u32) -> InputPosition { - InputPosition(self.0 + rhs) - } -} - -impl AddAssign for InputPosition { - fn add_assign(&mut self, rhs: u32) { - self.0 += rhs - } -} -- cgit v1.2.3 From e72ad0a2faac98972544dd42316ccf8090717102 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 22:27:49 +0300 Subject: fix off by one error --- crates/ra_syntax/src/parsing/parser_impl/input.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'crates/ra_syntax/src/parsing/parser_impl') diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs index e9735e526..11b32b9ce 100644 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs @@ -17,7 +17,7 @@ impl<'t> TokenSource for ParserInput<'t> { } fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { let idx_curr = pos.0 as usize; - let idx_next = pos.0 as usize; + let idx_next = pos.0 as usize + 1; if !(idx_next < self.tokens.len()) { return true; } -- cgit v1.2.3 From cce23fddba4241202ebd29cce44db4ce9a08793a Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 22:52:32 +0300 Subject: flattern module structure --- crates/ra_syntax/src/parsing/parser_impl/event.rs | 254 ---------------------- crates/ra_syntax/src/parsing/parser_impl/input.rs | 73 ------- 2 files changed, 327 deletions(-) delete mode 100644 crates/ra_syntax/src/parsing/parser_impl/event.rs delete mode 100644 crates/ra_syntax/src/parsing/parser_impl/input.rs (limited to 'crates/ra_syntax/src/parsing/parser_impl') diff --git a/crates/ra_syntax/src/parsing/parser_impl/event.rs b/crates/ra_syntax/src/parsing/parser_impl/event.rs deleted file mode 100644 index 9663fba35..000000000 --- a/crates/ra_syntax/src/parsing/parser_impl/event.rs +++ /dev/null @@ -1,254 +0,0 @@ -//! This module provides a way to construct a `File`. -//! It is intended to be completely decoupled from the -//! parser, so as to allow to evolve the tree representation -//! and the parser algorithm independently. -//! -//! The `TreeSink` trait is the bridge between the parser and the -//! tree builder: the parser produces a stream of events like -//! `start node`, `finish node`, and `FileBuilder` converts -//! this stream to a real tree. -use std::mem; - -use crate::{ - SmolStr, - SyntaxKind::{self, *}, - TextRange, TextUnit, - syntax_error::{ - ParseError, - SyntaxError, - SyntaxErrorKind, - }, - parsing::{ - lexer::Token, - parser_impl::TreeSink, - }, -}; - -/// `Parser` produces a flat list of `Event`s. -/// They are converted to a tree-structure in -/// a separate pass, via `TreeBuilder`. -#[derive(Debug)] -pub(crate) enum Event { - /// This event signifies the start of the node. - /// It should be either abandoned (in which case the - /// `kind` is `TOMBSTONE`, and the event is ignored), - /// or completed via a `Finish` event. - /// - /// All tokens between a `Start` and a `Finish` would - /// become the children of the respective node. - /// - /// For left-recursive syntactic constructs, the parser produces - /// a child node before it sees a parent. `forward_parent` - /// saves the position of current event's parent. - /// - /// Consider this path - /// - /// foo::bar - /// - /// The events for it would look like this: - /// - /// - /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH - /// | /\ - /// | | - /// +------forward-parent------+ - /// - /// And the tree would look like this - /// - /// +--PATH---------+ - /// | | | - /// | | | - /// | '::' 'bar' - /// | - /// PATH - /// | - /// 'foo' - /// - /// See also `CompletedMarker::precede`. - Start { - kind: SyntaxKind, - forward_parent: Option, - }, - - /// Complete the previous `Start` event - Finish, - - /// Produce a single leaf-element. - /// `n_raw_tokens` is used to glue complex contextual tokens. - /// For example, lexer tokenizes `>>` as `>`, `>`, and - /// `n_raw_tokens = 2` is used to produced a single `>>`. - Token { - kind: SyntaxKind, - n_raw_tokens: u8, - }, - - Error { - msg: ParseError, - }, -} - -impl Event { - pub(crate) fn tombstone() -> Self { - Event::Start { kind: TOMBSTONE, forward_parent: None } - } -} - -pub(super) struct EventProcessor<'a, S: TreeSink> { - sink: S, - text_pos: TextUnit, - text: &'a str, - token_pos: usize, - tokens: &'a [Token], - events: &'a mut [Event], -} - -impl<'a, S: TreeSink> EventProcessor<'a, S> { - pub(super) fn new( - sink: S, - text: &'a str, - tokens: &'a [Token], - events: &'a mut [Event], - ) -> EventProcessor<'a, S> { - EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events } - } - - /// Generate the syntax tree with the control of events. - pub(super) fn process(mut self) -> S { - let mut forward_parents = Vec::new(); - - for i in 0..self.events.len() { - match mem::replace(&mut self.events[i], Event::tombstone()) { - Event::Start { kind: TOMBSTONE, .. } => (), - - Event::Start { kind, forward_parent } => { - // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, - // in the normal control flow, the parent-child relation: `A -> B -> C`, - // while with the magic forward_parent, it writes: `C <- B <- A`. - - // append `A` into parents. - forward_parents.push(kind); - let mut idx = i; - let mut fp = forward_parent; - while let Some(fwd) = fp { - idx += fwd as usize; - // append `A`'s forward_parent `B` - fp = match mem::replace(&mut self.events[idx], Event::tombstone()) { - Event::Start { kind, forward_parent } => { - forward_parents.push(kind); - forward_parent - } - _ => unreachable!(), - }; - // append `B`'s forward_parent `C` in the next stage. - } - - for kind in forward_parents.drain(..).rev() { - self.start(kind); - } - } - Event::Finish => { - let is_last = i == self.events.len() - 1; - self.finish(is_last); - } - Event::Token { kind, n_raw_tokens } => { - self.eat_trivias(); - let n_raw_tokens = n_raw_tokens as usize; - let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens] - .iter() - .map(|it| it.len) - .sum::(); - self.leaf(kind, len, n_raw_tokens); - } - Event::Error { msg } => self - .sink - .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)), - } - } - self.sink - } - - /// Add the node into syntax tree but discard the comments/whitespaces. - fn start(&mut self, kind: SyntaxKind) { - if kind == SOURCE_FILE { - self.sink.start_branch(kind); - return; - } - let n_trivias = - self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); - let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; - let mut trivia_end = - self.text_pos + leading_trivias.iter().map(|it| it.len).sum::(); - - let n_attached_trivias = { - let leading_trivias = leading_trivias.iter().rev().map(|it| { - let next_end = trivia_end - it.len; - let range = TextRange::from_to(next_end, trivia_end); - trivia_end = next_end; - (it.kind, &self.text[range]) - }); - n_attached_trivias(kind, leading_trivias) - }; - self.eat_n_trivias(n_trivias - n_attached_trivias); - self.sink.start_branch(kind); - self.eat_n_trivias(n_attached_trivias); - } - - fn finish(&mut self, is_last: bool) { - if is_last { - self.eat_trivias() - } - self.sink.finish_branch(); - } - - fn eat_trivias(&mut self) { - while let Some(&token) = self.tokens.get(self.token_pos) { - if !token.kind.is_trivia() { - break; - } - self.leaf(token.kind, token.len, 1); - } - } - - fn eat_n_trivias(&mut self, n: usize) { - for _ in 0..n { - let token = self.tokens[self.token_pos]; - assert!(token.kind.is_trivia()); - self.leaf(token.kind, token.len, 1); - } - } - - fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { - let range = TextRange::offset_len(self.text_pos, len); - let text: SmolStr = self.text[range].into(); - self.text_pos += len; - self.token_pos += n_tokens; - self.sink.leaf(kind, text); - } -} - -fn n_attached_trivias<'a>( - kind: SyntaxKind, - trivias: impl Iterator, -) -> usize { - match kind { - CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF - | MODULE | NAMED_FIELD_DEF => { - let mut res = 0; - for (i, (kind, text)) in trivias.enumerate() { - match kind { - WHITESPACE => { - if text.contains("\n\n") { - break; - } - } - COMMENT => { - res = i + 1; - } - _ => (), - } - } - res - } - _ => 0, - } -} diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs deleted file mode 100644 index 11b32b9ce..000000000 --- a/crates/ra_syntax/src/parsing/parser_impl/input.rs +++ /dev/null @@ -1,73 +0,0 @@ -use crate::{ - SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, - parsing::{ - TokenPos, - parser_impl::TokenSource, - lexer::Token, - }, -}; - -impl<'t> TokenSource for ParserInput<'t> { - fn token_kind(&self, pos: TokenPos) -> SyntaxKind { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return EOF; - } - self.tokens[idx].kind - } - fn is_token_joint_to_next(&self, pos: TokenPos) -> bool { - let idx_curr = pos.0 as usize; - let idx_next = pos.0 as usize + 1; - if !(idx_next < self.tokens.len()) { - return true; - } - self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next] - } - fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool { - let idx = pos.0 as usize; - if !(idx < self.tokens.len()) { - return false; - } - let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); - - self.text[range] == *kw - } -} - -pub(crate) struct ParserInput<'t> { - text: &'t str, - /// start position of each token(expect whitespace and comment) - /// ```non-rust - /// struct Foo; - /// ^------^--- - /// | | ^- - /// 0 7 10 - /// ``` - /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` - start_offsets: Vec, - /// non-whitespace/comment tokens - /// ```non-rust - /// struct Foo {} - /// ^^^^^^ ^^^ ^^ - /// ``` - /// tokens: `[struct, Foo, {, }]` - tokens: Vec, -} - -impl<'t> ParserInput<'t> { - /// Generate input from tokens(expect comment and whitespace). - pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { - let mut tokens = Vec::new(); - let mut start_offsets = Vec::new(); - let mut len = 0.into(); - for &token in raw_tokens.iter() { - if !token.kind.is_trivia() { - tokens.push(token); - start_offsets.push(len); - } - len += token.len; - } - - ParserInput { text, start_offsets, tokens } - } -} -- cgit v1.2.3