From 0c1cb981820c55127c3c09d93868814a1df98246 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 23 Feb 2019 16:07:29 +0300 Subject: rename --- crates/ra_syntax/src/parsing.rs | 12 +- crates/ra_syntax/src/parsing/builder.rs | 170 ---------------------- crates/ra_syntax/src/parsing/input.rs | 67 --------- crates/ra_syntax/src/parsing/reparsing.rs | 8 +- crates/ra_syntax/src/parsing/text_token_source.rs | 67 +++++++++ crates/ra_syntax/src/parsing/text_tree_sink.rs | 170 ++++++++++++++++++++++ 6 files changed, 245 insertions(+), 249 deletions(-) delete mode 100644 crates/ra_syntax/src/parsing/builder.rs delete mode 100644 crates/ra_syntax/src/parsing/input.rs create mode 100644 crates/ra_syntax/src/parsing/text_token_source.rs create mode 100644 crates/ra_syntax/src/parsing/text_tree_sink.rs diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index cf573801c..ad5668a65 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -2,17 +2,13 @@ //! incremental reparsing. mod lexer; -mod input; -mod builder; +mod text_token_source; +mod text_tree_sink; mod reparsing; use crate::{ SyntaxError, syntax_node::GreenNode, - parsing::{ - builder::TreeBuilder, - input::ParserInput, - }, }; pub use self::lexer::{tokenize, Token}; @@ -21,8 +17,8 @@ pub(crate) use self::reparsing::incremental_reparse; pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec) { let tokens = tokenize(&text); - let token_source = ParserInput::new(text, &tokens); - let mut tree_sink = TreeBuilder::new(text, &tokens); + let token_source = text_token_source::TextTokenSource::new(text, &tokens); + let mut tree_sink = text_tree_sink::TextTreeSink::new(text, &tokens); ra_parser::parse(&token_source, &mut tree_sink); tree_sink.finish() } diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs deleted file mode 100644 index cfe3139b8..000000000 --- a/crates/ra_syntax/src/parsing/builder.rs +++ /dev/null @@ -1,170 +0,0 @@ -use std::mem; - -use ra_parser::{TreeSink, ParseError}; -use rowan::GreenNodeBuilder; - -use crate::{ - SmolStr, SyntaxError, SyntaxErrorKind, TextUnit, TextRange, - SyntaxKind::{self, *}, - parsing::Token, - syntax_node::{GreenNode, RaTypes}, -}; - -/// Bridges the parser with our specific syntax tree representation. -/// -/// `TreeBuilder` also handles attachment of trivia (whitespace) to nodes. -pub(crate) struct TreeBuilder<'a> { - text: &'a str, - tokens: &'a [Token], - text_pos: TextUnit, - token_pos: usize, - state: State, - errors: Vec, - inner: GreenNodeBuilder, -} - -enum State { - PendingStart, - Normal, - PendingFinish, -} - -impl<'a> TreeSink for TreeBuilder<'a> { - fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) { - match mem::replace(&mut self.state, State::Normal) { - State::PendingStart => unreachable!(), - State::PendingFinish => self.inner.finish_internal(), - State::Normal => (), - } - self.eat_trivias(); - let n_tokens = n_tokens as usize; - let len = self.tokens[self.token_pos..self.token_pos + n_tokens] - .iter() - .map(|it| it.len) - .sum::(); - self.do_leaf(kind, len, n_tokens); - } - - fn start_branch(&mut self, kind: SyntaxKind) { - match mem::replace(&mut self.state, State::Normal) { - State::PendingStart => { - self.inner.start_internal(kind); - // No need to attach trivias to previous node: there is no - // previous node. - return; - } - State::PendingFinish => self.inner.finish_internal(), - State::Normal => (), - } - - let n_trivias = - self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); - let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; - let mut trivia_end = - self.text_pos + leading_trivias.iter().map(|it| it.len).sum::(); - - let n_attached_trivias = { - let leading_trivias = leading_trivias.iter().rev().map(|it| { - let next_end = trivia_end - it.len; - let range = TextRange::from_to(next_end, trivia_end); - trivia_end = next_end; - (it.kind, &self.text[range]) - }); - n_attached_trivias(kind, leading_trivias) - }; - self.eat_n_trivias(n_trivias - n_attached_trivias); - self.inner.start_internal(kind); - self.eat_n_trivias(n_attached_trivias); - } - - fn finish_branch(&mut self) { - match mem::replace(&mut self.state, State::PendingFinish) { - State::PendingStart => unreachable!(), - State::PendingFinish => self.inner.finish_internal(), - State::Normal => (), - } - } - - fn error(&mut self, error: ParseError) { - let error = SyntaxError::new(SyntaxErrorKind::ParseError(error), self.text_pos); - self.errors.push(error) - } -} - -impl<'a> TreeBuilder<'a> { - pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> TreeBuilder<'a> { - TreeBuilder { - text, - tokens, - text_pos: 0.into(), - token_pos: 0, - state: State::PendingStart, - errors: Vec::new(), - inner: GreenNodeBuilder::new(), - } - } - - pub(super) fn finish(mut self) -> (GreenNode, Vec) { - match mem::replace(&mut self.state, State::Normal) { - State::PendingFinish => { - self.eat_trivias(); - self.inner.finish_internal() - } - State::PendingStart | State::Normal => unreachable!(), - } - - (self.inner.finish(), self.errors) - } - - fn eat_trivias(&mut self) { - while let Some(&token) = self.tokens.get(self.token_pos) { - if !token.kind.is_trivia() { - break; - } - self.do_leaf(token.kind, token.len, 1); - } - } - - fn eat_n_trivias(&mut self, n: usize) { - for _ in 0..n { - let token = self.tokens[self.token_pos]; - assert!(token.kind.is_trivia()); - self.do_leaf(token.kind, token.len, 1); - } - } - - fn do_leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { - let range = TextRange::offset_len(self.text_pos, len); - let text: SmolStr = self.text[range].into(); - self.text_pos += len; - self.token_pos += n_tokens; - self.inner.leaf(kind, text); - } -} - -fn n_attached_trivias<'a>( - kind: SyntaxKind, - trivias: impl Iterator, -) -> usize { - match kind { - CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF - | MODULE | NAMED_FIELD_DEF => { - let mut res = 0; - for (i, (kind, text)) in trivias.enumerate() { - match kind { - WHITESPACE => { - if text.contains("\n\n") { - break; - } - } - COMMENT => { - res = i + 1; - } - _ => (), - } - } - res - } - _ => 0, - } -} diff --git a/crates/ra_syntax/src/parsing/input.rs b/crates/ra_syntax/src/parsing/input.rs deleted file mode 100644 index 31c6a3b9b..000000000 --- a/crates/ra_syntax/src/parsing/input.rs +++ /dev/null @@ -1,67 +0,0 @@ -use ra_parser::TokenSource; - -use crate::{ - SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, - parsing::lexer::Token, -}; - -pub(crate) struct ParserInput<'t> { - text: &'t str, - /// start position of each token(expect whitespace and comment) - /// ```non-rust - /// struct Foo; - /// ^------^--- - /// | | ^- - /// 0 7 10 - /// ``` - /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` - start_offsets: Vec, - /// non-whitespace/comment tokens - /// ```non-rust - /// struct Foo {} - /// ^^^^^^ ^^^ ^^ - /// ``` - /// tokens: `[struct, Foo, {, }]` - tokens: Vec, -} - -impl<'t> TokenSource for ParserInput<'t> { - fn token_kind(&self, pos: usize) -> SyntaxKind { - if !(pos < self.tokens.len()) { - return EOF; - } - self.tokens[pos].kind - } - fn is_token_joint_to_next(&self, pos: usize) -> bool { - if !(pos + 1 < self.tokens.len()) { - return true; - } - self.start_offsets[pos] + self.tokens[pos].len == self.start_offsets[pos + 1] - } - fn is_keyword(&self, pos: usize, kw: &str) -> bool { - if !(pos < self.tokens.len()) { - return false; - } - let range = TextRange::offset_len(self.start_offsets[pos], self.tokens[pos].len); - - self.text[range] == *kw - } -} - -impl<'t> ParserInput<'t> { - /// Generate input from tokens(expect comment and whitespace). - pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { - let mut tokens = Vec::new(); - let mut start_offsets = Vec::new(); - let mut len = 0.into(); - for &token in raw_tokens.iter() { - if !token.kind.is_trivia() { - tokens.push(token); - start_offsets.push(len); - } - len += token.len; - } - - ParserInput { text, start_offsets, tokens } - } -} diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 19d8adcfb..ba77a3b6c 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -14,8 +14,8 @@ use crate::{ algo, syntax_node::{GreenNode, SyntaxNode}, parsing::{ - input::ParserInput, - builder::TreeBuilder, + text_token_source::TextTokenSource, + text_tree_sink::TextTreeSink, lexer::{tokenize, Token}, } }; @@ -68,8 +68,8 @@ fn reparse_block<'node>( if !is_balanced(&tokens) { return None; } - let token_source = ParserInput::new(&text, &tokens); - let mut tree_sink = TreeBuilder::new(&text, &tokens); + let token_source = TextTokenSource::new(&text, &tokens); + let mut tree_sink = TextTreeSink::new(&text, &tokens); reparser.parse(&token_source, &mut tree_sink); let (green, new_errors) = tree_sink.finish(); Some((node, green, new_errors)) diff --git a/crates/ra_syntax/src/parsing/text_token_source.rs b/crates/ra_syntax/src/parsing/text_token_source.rs new file mode 100644 index 000000000..a6277f66f --- /dev/null +++ b/crates/ra_syntax/src/parsing/text_token_source.rs @@ -0,0 +1,67 @@ +use ra_parser::TokenSource; + +use crate::{ + SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, + parsing::lexer::Token, +}; + +pub(crate) struct TextTokenSource<'t> { + text: &'t str, + /// start position of each token(expect whitespace and comment) + /// ```non-rust + /// struct Foo; + /// ^------^--- + /// | | ^- + /// 0 7 10 + /// ``` + /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` + start_offsets: Vec, + /// non-whitespace/comment tokens + /// ```non-rust + /// struct Foo {} + /// ^^^^^^ ^^^ ^^ + /// ``` + /// tokens: `[struct, Foo, {, }]` + tokens: Vec, +} + +impl<'t> TokenSource for TextTokenSource<'t> { + fn token_kind(&self, pos: usize) -> SyntaxKind { + if !(pos < self.tokens.len()) { + return EOF; + } + self.tokens[pos].kind + } + fn is_token_joint_to_next(&self, pos: usize) -> bool { + if !(pos + 1 < self.tokens.len()) { + return true; + } + self.start_offsets[pos] + self.tokens[pos].len == self.start_offsets[pos + 1] + } + fn is_keyword(&self, pos: usize, kw: &str) -> bool { + if !(pos < self.tokens.len()) { + return false; + } + let range = TextRange::offset_len(self.start_offsets[pos], self.tokens[pos].len); + + self.text[range] == *kw + } +} + +impl<'t> TextTokenSource<'t> { + /// Generate input from tokens(expect comment and whitespace). + pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> TextTokenSource<'t> { + let mut tokens = Vec::new(); + let mut start_offsets = Vec::new(); + let mut len = 0.into(); + for &token in raw_tokens.iter() { + if !token.kind.is_trivia() { + tokens.push(token); + start_offsets.push(len); + } + len += token.len; + } + + TextTokenSource { text, start_offsets, tokens } + } +} diff --git a/crates/ra_syntax/src/parsing/text_tree_sink.rs b/crates/ra_syntax/src/parsing/text_tree_sink.rs new file mode 100644 index 000000000..8c1d78deb --- /dev/null +++ b/crates/ra_syntax/src/parsing/text_tree_sink.rs @@ -0,0 +1,170 @@ +use std::mem; + +use ra_parser::{TreeSink, ParseError}; +use rowan::GreenNodeBuilder; + +use crate::{ + SmolStr, SyntaxError, SyntaxErrorKind, TextUnit, TextRange, + SyntaxKind::{self, *}, + parsing::Token, + syntax_node::{GreenNode, RaTypes}, +}; + +/// Bridges the parser with our specific syntax tree representation. +/// +/// `TextTreeSink` also handles attachment of trivia (whitespace) to nodes. +pub(crate) struct TextTreeSink<'a> { + text: &'a str, + tokens: &'a [Token], + text_pos: TextUnit, + token_pos: usize, + state: State, + errors: Vec, + inner: GreenNodeBuilder, +} + +enum State { + PendingStart, + Normal, + PendingFinish, +} + +impl<'a> TreeSink for TextTreeSink<'a> { + fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) { + match mem::replace(&mut self.state, State::Normal) { + State::PendingStart => unreachable!(), + State::PendingFinish => self.inner.finish_internal(), + State::Normal => (), + } + self.eat_trivias(); + let n_tokens = n_tokens as usize; + let len = self.tokens[self.token_pos..self.token_pos + n_tokens] + .iter() + .map(|it| it.len) + .sum::(); + self.do_leaf(kind, len, n_tokens); + } + + fn start_branch(&mut self, kind: SyntaxKind) { + match mem::replace(&mut self.state, State::Normal) { + State::PendingStart => { + self.inner.start_internal(kind); + // No need to attach trivias to previous node: there is no + // previous node. + return; + } + State::PendingFinish => self.inner.finish_internal(), + State::Normal => (), + } + + let n_trivias = + self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); + let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; + let mut trivia_end = + self.text_pos + leading_trivias.iter().map(|it| it.len).sum::(); + + let n_attached_trivias = { + let leading_trivias = leading_trivias.iter().rev().map(|it| { + let next_end = trivia_end - it.len; + let range = TextRange::from_to(next_end, trivia_end); + trivia_end = next_end; + (it.kind, &self.text[range]) + }); + n_attached_trivias(kind, leading_trivias) + }; + self.eat_n_trivias(n_trivias - n_attached_trivias); + self.inner.start_internal(kind); + self.eat_n_trivias(n_attached_trivias); + } + + fn finish_branch(&mut self) { + match mem::replace(&mut self.state, State::PendingFinish) { + State::PendingStart => unreachable!(), + State::PendingFinish => self.inner.finish_internal(), + State::Normal => (), + } + } + + fn error(&mut self, error: ParseError) { + let error = SyntaxError::new(SyntaxErrorKind::ParseError(error), self.text_pos); + self.errors.push(error) + } +} + +impl<'a> TextTreeSink<'a> { + pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> TextTreeSink<'a> { + TextTreeSink { + text, + tokens, + text_pos: 0.into(), + token_pos: 0, + state: State::PendingStart, + errors: Vec::new(), + inner: GreenNodeBuilder::new(), + } + } + + pub(super) fn finish(mut self) -> (GreenNode, Vec) { + match mem::replace(&mut self.state, State::Normal) { + State::PendingFinish => { + self.eat_trivias(); + self.inner.finish_internal() + } + State::PendingStart | State::Normal => unreachable!(), + } + + (self.inner.finish(), self.errors) + } + + fn eat_trivias(&mut self) { + while let Some(&token) = self.tokens.get(self.token_pos) { + if !token.kind.is_trivia() { + break; + } + self.do_leaf(token.kind, token.len, 1); + } + } + + fn eat_n_trivias(&mut self, n: usize) { + for _ in 0..n { + let token = self.tokens[self.token_pos]; + assert!(token.kind.is_trivia()); + self.do_leaf(token.kind, token.len, 1); + } + } + + fn do_leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { + let range = TextRange::offset_len(self.text_pos, len); + let text: SmolStr = self.text[range].into(); + self.text_pos += len; + self.token_pos += n_tokens; + self.inner.leaf(kind, text); + } +} + +fn n_attached_trivias<'a>( + kind: SyntaxKind, + trivias: impl Iterator, +) -> usize { + match kind { + CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF + | MODULE | NAMED_FIELD_DEF => { + let mut res = 0; + for (i, (kind, text)) in trivias.enumerate() { + match kind { + WHITESPACE => { + if text.contains("\n\n") { + break; + } + } + COMMENT => { + res = i + 1; + } + _ => (), + } + } + res + } + _ => 0, + } +} -- cgit v1.2.3 From f078f7adc8ac0ffae07462d736083807c98c0483 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 23 Feb 2019 16:55:01 +0300 Subject: introduce tree builder --- crates/ra_syntax/src/lib.rs | 2 +- crates/ra_syntax/src/parsing/text_tree_sink.rs | 28 ++++++-------- crates/ra_syntax/src/syntax_node.rs | 51 ++++++++++++++++++++++++-- 3 files changed, 61 insertions(+), 20 deletions(-) diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs index e7d402446..7334d53ef 100644 --- a/crates/ra_syntax/src/lib.rs +++ b/crates/ra_syntax/src/lib.rs @@ -36,7 +36,7 @@ pub use crate::{ ast::AstNode, syntax_error::{SyntaxError, SyntaxErrorKind, Location}, syntax_text::SyntaxText, - syntax_node::{Direction, SyntaxNode, WalkEvent, TreeArc}, + syntax_node::{Direction, SyntaxNode, WalkEvent, TreeArc, SyntaxTreeBuilder}, ptr::{SyntaxNodePtr, AstPtr}, parsing::{tokenize, Token}, }; diff --git a/crates/ra_syntax/src/parsing/text_tree_sink.rs b/crates/ra_syntax/src/parsing/text_tree_sink.rs index 8c1d78deb..961a91d41 100644 --- a/crates/ra_syntax/src/parsing/text_tree_sink.rs +++ b/crates/ra_syntax/src/parsing/text_tree_sink.rs @@ -1,13 +1,12 @@ use std::mem; use ra_parser::{TreeSink, ParseError}; -use rowan::GreenNodeBuilder; use crate::{ - SmolStr, SyntaxError, SyntaxErrorKind, TextUnit, TextRange, + SmolStr, SyntaxError, TextUnit, TextRange, SyntaxTreeBuilder, SyntaxKind::{self, *}, parsing::Token, - syntax_node::{GreenNode, RaTypes}, + syntax_node::GreenNode, }; /// Bridges the parser with our specific syntax tree representation. @@ -19,8 +18,7 @@ pub(crate) struct TextTreeSink<'a> { text_pos: TextUnit, token_pos: usize, state: State, - errors: Vec, - inner: GreenNodeBuilder, + inner: SyntaxTreeBuilder, } enum State { @@ -33,7 +31,7 @@ impl<'a> TreeSink for TextTreeSink<'a> { fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) { match mem::replace(&mut self.state, State::Normal) { State::PendingStart => unreachable!(), - State::PendingFinish => self.inner.finish_internal(), + State::PendingFinish => self.inner.finish_branch(), State::Normal => (), } self.eat_trivias(); @@ -48,12 +46,12 @@ impl<'a> TreeSink for TextTreeSink<'a> { fn start_branch(&mut self, kind: SyntaxKind) { match mem::replace(&mut self.state, State::Normal) { State::PendingStart => { - self.inner.start_internal(kind); + self.inner.start_branch(kind); // No need to attach trivias to previous node: there is no // previous node. return; } - State::PendingFinish => self.inner.finish_internal(), + State::PendingFinish => self.inner.finish_branch(), State::Normal => (), } @@ -73,21 +71,20 @@ impl<'a> TreeSink for TextTreeSink<'a> { n_attached_trivias(kind, leading_trivias) }; self.eat_n_trivias(n_trivias - n_attached_trivias); - self.inner.start_internal(kind); + self.inner.start_branch(kind); self.eat_n_trivias(n_attached_trivias); } fn finish_branch(&mut self) { match mem::replace(&mut self.state, State::PendingFinish) { State::PendingStart => unreachable!(), - State::PendingFinish => self.inner.finish_internal(), + State::PendingFinish => self.inner.finish_branch(), State::Normal => (), } } fn error(&mut self, error: ParseError) { - let error = SyntaxError::new(SyntaxErrorKind::ParseError(error), self.text_pos); - self.errors.push(error) + self.inner.error(error, self.text_pos) } } @@ -99,8 +96,7 @@ impl<'a> TextTreeSink<'a> { text_pos: 0.into(), token_pos: 0, state: State::PendingStart, - errors: Vec::new(), - inner: GreenNodeBuilder::new(), + inner: SyntaxTreeBuilder::default(), } } @@ -108,12 +104,12 @@ impl<'a> TextTreeSink<'a> { match mem::replace(&mut self.state, State::Normal) { State::PendingFinish => { self.eat_trivias(); - self.inner.finish_internal() + self.inner.finish_branch() } State::PendingStart | State::Normal => unreachable!(), } - (self.inner.finish(), self.errors) + self.inner.finish_raw() } fn eat_trivias(&mut self) { diff --git a/crates/ra_syntax/src/syntax_node.rs b/crates/ra_syntax/src/syntax_node.rs index 4d54ae614..e5b4cdb11 100644 --- a/crates/ra_syntax/src/syntax_node.rs +++ b/crates/ra_syntax/src/syntax_node.rs @@ -11,11 +11,12 @@ use std::{ borrow::Borrow, }; -use rowan::{Types, TransparentNewType}; +use ra_parser::ParseError; +use rowan::{Types, TransparentNewType, GreenNodeBuilder}; use crate::{ - SmolStr, SyntaxKind, TextRange, SyntaxText, SourceFile, AstNode, - syntax_error::SyntaxError, + SmolStr, SyntaxKind, TextUnit, TextRange, SyntaxText, SourceFile, AstNode, + syntax_error::{SyntaxError, SyntaxErrorKind}, }; pub use rowan::WalkEvent; @@ -276,3 +277,47 @@ fn has_short_text(kind: SyntaxKind) -> bool { _ => false, } } + +pub struct SyntaxTreeBuilder { + errors: Vec, + inner: GreenNodeBuilder, +} + +impl Default for SyntaxTreeBuilder { + fn default() -> SyntaxTreeBuilder { + SyntaxTreeBuilder { errors: Vec::new(), inner: GreenNodeBuilder::new() } + } +} + +impl SyntaxTreeBuilder { + pub(crate) fn finish_raw(self) -> (GreenNode, Vec) { + let green = self.inner.finish(); + (green, self.errors) + } + + pub fn finish(self) -> TreeArc { + let (green, errors) = self.finish_raw(); + let node = SyntaxNode::new(green, errors); + if cfg!(debug_assertions) { + crate::validation::validate_block_structure(&node); + } + node + } + + pub fn leaf(&mut self, kind: SyntaxKind, text: SmolStr) { + self.inner.leaf(kind, text) + } + + pub fn start_branch(&mut self, kind: SyntaxKind) { + self.inner.start_internal(kind) + } + + pub fn finish_branch(&mut self) { + self.inner.finish_internal() + } + + pub fn error(&mut self, error: ParseError, text_pos: TextUnit) { + let error = SyntaxError::new(SyntaxErrorKind::ParseError(error), text_pos); + self.errors.push(error) + } +} -- cgit v1.2.3 From 60373aa5047b350cd12fb1fa3c03f49b77d2448d Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 23 Feb 2019 14:06:10 +0300 Subject: add interface --- crates/ra_mbe/src/syntax_bridge.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 848c785f8..2c8ab302f 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -19,6 +19,11 @@ pub fn ast_to_token_tree(ast: &ast::TokenTree) -> Option<(tt::Subtree, TokenMap) Some((tt, token_map)) } +/// Parses the token tree (result of macro expansion) as a sequence of items +pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> ast::SourceFile { + unimplemented!() +} + impl TokenMap { pub fn relative_range_of(&self, tt: tt::TokenId) -> Option { let idx = tt.0 as usize; -- cgit v1.2.3 From e9cafafbc2108dd91ca687bc8c21bc6a7fe2dfff Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 23 Feb 2019 14:07:29 +0300 Subject: add dependency on the parser --- Cargo.lock | 1 + crates/ra_mbe/Cargo.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 5912659d4..e697de588 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1066,6 +1066,7 @@ dependencies = [ name = "ra_mbe" version = "0.1.0" dependencies = [ + "ra_parser 0.1.0", "ra_syntax 0.1.0", "ra_tt 0.1.0", "rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/crates/ra_mbe/Cargo.toml b/crates/ra_mbe/Cargo.toml index e7b8660e7..6e785f570 100644 --- a/crates/ra_mbe/Cargo.toml +++ b/crates/ra_mbe/Cargo.toml @@ -6,6 +6,7 @@ authors = ["rust-analyzer developers"] [dependencies] ra_syntax = { path = "../ra_syntax" } +ra_parser = { path = "../ra_parser" } tt = { path = "../ra_tt", package = "ra_tt" } rustc-hash = "1.0.0" -- cgit v1.2.3 From 10deefd3718b5665b4955908e057a48b7ad61464 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 23 Feb 2019 14:18:32 +0300 Subject: token source scaffold --- crates/ra_mbe/src/syntax_bridge.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 2c8ab302f..e64ba7ff2 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -1,5 +1,6 @@ +use ra_parser::TokenSource; use ra_syntax::{ - AstNode, SyntaxNode, TextRange, + AstNode, SyntaxNode, TextRange, SyntaxKind, ast, SyntaxKind::*, TextUnit }; @@ -89,3 +90,23 @@ fn convert_tt( let res = tt::Subtree { delimiter, token_trees }; Some(res) } + +struct TtTokenSource; + +impl TtTokenSource { + fn new(tt: &tt::Subtree) -> TtTokenSource { + unimplemented!() + } +} + +impl TokenSource for TtTokenSource { + fn token_kind(&self, pos: usize) -> SyntaxKind { + unimplemented!() + } + fn is_token_joint_to_next(&self, pos: usize) -> bool { + unimplemented!() + } + fn is_keyword(&self, pos: usize, kw: &str) -> bool { + unimplemented!() + } +} -- cgit v1.2.3 From 71b8a874e7931e2213e3864e1eae90ceb2551fc2 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 23 Feb 2019 15:29:06 +0300 Subject: flatten tt --- crates/ra_mbe/src/syntax_bridge.rs | 61 ++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index e64ba7ff2..7a4ba9e93 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -1,6 +1,6 @@ use ra_parser::TokenSource; use ra_syntax::{ - AstNode, SyntaxNode, TextRange, SyntaxKind, + AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, ast, SyntaxKind::*, TextUnit }; @@ -91,22 +91,71 @@ fn convert_tt( Some(res) } -struct TtTokenSource; +struct TtTokenSource { + tokens: Vec, +} + +struct Tok { + kind: SyntaxKind, + is_joint_to_next: bool, + text: Option, +} impl TtTokenSource { fn new(tt: &tt::Subtree) -> TtTokenSource { - unimplemented!() + let mut res = TtTokenSource { tokens: Vec::new() }; + res.convert_subtree(tt); + res + } + fn convert_subtree(&mut self, sub: &tt::Subtree) { + self.push_delim(sub.delimiter, false); + sub.token_trees.iter().for_each(|tt| self.convert_tt(tt)); + self.push_delim(sub.delimiter, true) + } + fn convert_tt(&mut self, tt: &tt::TokenTree) { + match tt { + tt::TokenTree::Leaf(leaf) => self.convert_leaf(leaf), + tt::TokenTree::Subtree(sub) => self.convert_subtree(sub), + } + } + fn convert_leaf(&mut self, leaf: &tt::Leaf) { + let tok = match leaf { + tt::Leaf::Literal(l) => Tok { + kind: SyntaxKind::INT_NUMBER, // FIXME + is_joint_to_next: false, + text: Some(l.text.clone()), + }, + tt::Leaf::Punct(p) => Tok { + kind: SyntaxKind::from_char(p.char).unwrap(), + is_joint_to_next: p.spacing == tt::Spacing::Joint, + text: None, + }, + tt::Leaf::Ident(ident) => { + Tok { kind: IDENT, is_joint_to_next: false, text: Some(ident.text.clone()) } + } + }; + self.tokens.push(tok) + } + fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { + let kinds = match d { + tt::Delimiter::Parenthesis => [L_PAREN, R_PAREN], + tt::Delimiter::Brace => [L_CURLY, R_CURLY], + tt::Delimiter::Bracket => [L_BRACK, R_BRACK], + tt::Delimiter::None => return, + }; + let tok = Tok { kind: kinds[closing as usize], is_joint_to_next: false, text: None }; + self.tokens.push(tok) } } impl TokenSource for TtTokenSource { fn token_kind(&self, pos: usize) -> SyntaxKind { - unimplemented!() + self.tokens[pos].kind } fn is_token_joint_to_next(&self, pos: usize) -> bool { - unimplemented!() + self.tokens[pos].is_joint_to_next } fn is_keyword(&self, pos: usize, kw: &str) -> bool { - unimplemented!() + self.tokens[pos].text.as_ref().map(|it| it.as_str()) == Some(kw) } } -- cgit v1.2.3 From 83d6be6cecb19659e289eba63f12ac33dceb3b56 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 23 Feb 2019 15:43:45 +0300 Subject: keep-text --- crates/ra_mbe/src/syntax_bridge.rs | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 7a4ba9e93..24a043175 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -98,7 +98,7 @@ struct TtTokenSource { struct Tok { kind: SyntaxKind, is_joint_to_next: bool, - text: Option, + text: SmolStr, } impl TtTokenSource { @@ -123,27 +123,34 @@ impl TtTokenSource { tt::Leaf::Literal(l) => Tok { kind: SyntaxKind::INT_NUMBER, // FIXME is_joint_to_next: false, - text: Some(l.text.clone()), + text: l.text.clone(), }, tt::Leaf::Punct(p) => Tok { kind: SyntaxKind::from_char(p.char).unwrap(), is_joint_to_next: p.spacing == tt::Spacing::Joint, - text: None, + text: { + let mut buf = [0u8; 4]; + let s: &str = p.char.encode_utf8(&mut buf); + SmolStr::new(s) + }, }, tt::Leaf::Ident(ident) => { - Tok { kind: IDENT, is_joint_to_next: false, text: Some(ident.text.clone()) } + Tok { kind: IDENT, is_joint_to_next: false, text: ident.text.clone() } } }; self.tokens.push(tok) } fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { - let kinds = match d { - tt::Delimiter::Parenthesis => [L_PAREN, R_PAREN], - tt::Delimiter::Brace => [L_CURLY, R_CURLY], - tt::Delimiter::Bracket => [L_BRACK, R_BRACK], + let (kinds, texts) = match d { + tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), + tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"), + tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"), tt::Delimiter::None => return, }; - let tok = Tok { kind: kinds[closing as usize], is_joint_to_next: false, text: None }; + let idx = closing as usize; + let kind = kinds[idx]; + let text = &texts[idx..texts.len() - (1 - idx)]; + let tok = Tok { kind, is_joint_to_next: false, text: SmolStr::new(text) }; self.tokens.push(tok) } } @@ -156,6 +163,6 @@ impl TokenSource for TtTokenSource { self.tokens[pos].is_joint_to_next } fn is_keyword(&self, pos: usize, kw: &str) -> bool { - self.tokens[pos].text.as_ref().map(|it| it.as_str()) == Some(kw) + self.tokens[pos].text == *kw } } -- cgit v1.2.3 From 8eac450f41c6d94215f5d8c02235cd5917abaa69 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 23 Feb 2019 17:21:56 +0300 Subject: implement tt -> ast --- crates/ra_mbe/src/lib.rs | 2 +- crates/ra_mbe/src/syntax_bridge.rs | 58 +++++++++++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/crates/ra_mbe/src/lib.rs b/crates/ra_mbe/src/lib.rs index cdca3cafb..768f335fa 100644 --- a/crates/ra_mbe/src/lib.rs +++ b/crates/ra_mbe/src/lib.rs @@ -24,7 +24,7 @@ use ra_syntax::SmolStr; pub use tt::{Delimiter, Punct}; -pub use crate::syntax_bridge::ast_to_token_tree; +pub use crate::syntax_bridge::{ast_to_token_tree, token_tree_to_ast_item_list}; /// This struct contains AST for a single `macro_rules` definition. What might /// be very confusing is that AST has almost exactly the same shape as diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 24a043175..521b96d68 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -1,6 +1,6 @@ -use ra_parser::TokenSource; +use ra_parser::{TokenSource, TreeSink, ParseError}; use ra_syntax::{ - AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, + AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, SyntaxTreeBuilder, TreeArc, ast, SyntaxKind::*, TextUnit }; @@ -21,8 +21,12 @@ pub fn ast_to_token_tree(ast: &ast::TokenTree) -> Option<(tt::Subtree, TokenMap) } /// Parses the token tree (result of macro expansion) as a sequence of items -pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> ast::SourceFile { - unimplemented!() +pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> TreeArc { + let token_source = TtTokenSource::new(tt); + let mut tree_sink = TtTreeSink::new(&token_source.tokens); + ra_parser::parse(&token_source, &mut tree_sink); + let syntax = tree_sink.inner.finish(); + ast::SourceFile::cast(&syntax).unwrap().to_owned() } impl TokenMap { @@ -166,3 +170,49 @@ impl TokenSource for TtTokenSource { self.tokens[pos].text == *kw } } + +#[derive(Default)] +struct TtTreeSink<'a> { + buf: String, + tokens: &'a [Tok], + text_pos: TextUnit, + token_pos: usize, + inner: SyntaxTreeBuilder, +} + +impl<'a> TtTreeSink<'a> { + fn new(tokens: &'a [Tok]) -> TtTreeSink { + TtTreeSink { + buf: String::new(), + tokens, + text_pos: 0.into(), + token_pos: 0, + inner: SyntaxTreeBuilder::default(), + } + } +} + +impl<'a> TreeSink for TtTreeSink<'a> { + fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) { + for _ in 0..n_tokens { + self.buf += self.tokens[self.token_pos].text.as_str(); + self.token_pos += 1; + } + self.text_pos += TextUnit::of_str(&self.buf); + let text = SmolStr::new(self.buf.as_str()); + self.buf.clear(); + self.inner.leaf(kind, text) + } + + fn start_branch(&mut self, kind: SyntaxKind) { + self.inner.start_branch(kind); + } + + fn finish_branch(&mut self) { + self.inner.finish_branch(); + } + + fn error(&mut self, error: ParseError) { + self.inner.error(error, self.text_pos) + } +} -- cgit v1.2.3 From 8cf156d85b776780d890762fb45a188dccc8510f Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 23 Feb 2019 17:51:23 +0300 Subject: Add a test for macro parsing --- crates/ra_mbe/src/lib.rs | 61 ++++++++++++++++++++++++++++++++++++-- crates/ra_mbe/src/syntax_bridge.rs | 29 +++++++++++++----- 2 files changed, 80 insertions(+), 10 deletions(-) diff --git a/crates/ra_mbe/src/lib.rs b/crates/ra_mbe/src/lib.rs index 768f335fa..907402f5f 100644 --- a/crates/ra_mbe/src/lib.rs +++ b/crates/ra_mbe/src/lib.rs @@ -164,14 +164,18 @@ impl_froms!(TokenTree: Leaf, Subtree); crate::MacroRules::parse(&definition_tt).unwrap() } - fn assert_expansion(rules: &MacroRules, invocation: &str, expansion: &str) { + fn expand(rules: &MacroRules, invocation: &str) -> tt::Subtree { let source_file = ast::SourceFile::parse(invocation); let macro_invocation = source_file.syntax().descendants().find_map(ast::MacroCall::cast).unwrap(); let (invocation_tt, _) = ast_to_token_tree(macro_invocation.token_tree().unwrap()).unwrap(); - let expanded = rules.expand(&invocation_tt).unwrap(); + rules.expand(&invocation_tt).unwrap() + } + + fn assert_expansion(rules: &MacroRules, invocation: &str, expansion: &str) { + let expanded = expand(rules, invocation); assert_eq!(expanded.to_string(), expansion); } @@ -268,4 +272,57 @@ impl_froms!(TokenTree: Leaf, Subtree); assert_expansion(&rules, "foo! { Foo,# Bar }", "struct Foo ; struct Bar ;"); } + #[test] + fn expand_to_item_list() { + let rules = create_rules( + " + macro_rules! structs { + ($($i:ident),*) => { + $(struct $i { field: u32 } )* + } + } + ", + ); + let expansion = expand(&rules, "structs!(Foo, Bar)"); + let tree = token_tree_to_ast_item_list(&expansion); + assert_eq!( + tree.syntax().debug_dump().trim(), + r#" +SOURCE_FILE@[0; 40) + STRUCT_DEF@[0; 20) + STRUCT_KW@[0; 6) + NAME@[6; 9) + IDENT@[6; 9) "Foo" + NAMED_FIELD_DEF_LIST@[9; 20) + L_CURLY@[9; 10) + NAMED_FIELD_DEF@[10; 19) + NAME@[10; 15) + IDENT@[10; 15) "field" + COLON@[15; 16) + PATH_TYPE@[16; 19) + PATH@[16; 19) + PATH_SEGMENT@[16; 19) + NAME_REF@[16; 19) + IDENT@[16; 19) "u32" + R_CURLY@[19; 20) + STRUCT_DEF@[20; 40) + STRUCT_KW@[20; 26) + NAME@[26; 29) + IDENT@[26; 29) "Bar" + NAMED_FIELD_DEF_LIST@[29; 40) + L_CURLY@[29; 30) + NAMED_FIELD_DEF@[30; 39) + NAME@[30; 35) + IDENT@[30; 35) "field" + COLON@[35; 36) + PATH_TYPE@[36; 39) + PATH@[36; 39) + PATH_SEGMENT@[36; 39) + NAME_REF@[36; 39) + IDENT@[36; 39) "u32" + R_CURLY@[39; 40)"# + .trim() + ); + } + } diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 521b96d68..3fe5abba3 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -129,17 +129,26 @@ impl TtTokenSource { is_joint_to_next: false, text: l.text.clone(), }, - tt::Leaf::Punct(p) => Tok { - kind: SyntaxKind::from_char(p.char).unwrap(), - is_joint_to_next: p.spacing == tt::Spacing::Joint, - text: { + tt::Leaf::Punct(p) => { + let kind = match p.char { + // lexer may produce combpund tokens for these ones + '.' => DOT, + ':' => COLON, + '=' => EQ, + '!' => EXCL, + '-' => MINUS, + c => SyntaxKind::from_char(c).unwrap(), + }; + let text = { let mut buf = [0u8; 4]; let s: &str = p.char.encode_utf8(&mut buf); SmolStr::new(s) - }, - }, + }; + Tok { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text } + } tt::Leaf::Ident(ident) => { - Tok { kind: IDENT, is_joint_to_next: false, text: ident.text.clone() } + let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); + Tok { kind, is_joint_to_next: false, text: ident.text.clone() } } }; self.tokens.push(tok) @@ -161,7 +170,11 @@ impl TtTokenSource { impl TokenSource for TtTokenSource { fn token_kind(&self, pos: usize) -> SyntaxKind { - self.tokens[pos].kind + if let Some(tok) = self.tokens.get(pos) { + tok.kind + } else { + SyntaxKind::EOF + } } fn is_token_joint_to_next(&self, pos: usize) -> bool { self.tokens[pos].is_joint_to_next -- cgit v1.2.3 From 81bca78349afb9e15994f46401da0cfabfba04a1 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 24 Feb 2019 13:14:14 +0300 Subject: rename --- crates/ra_mbe/src/syntax_bridge.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 3fe5abba3..c1472bbe5 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -96,10 +96,10 @@ fn convert_tt( } struct TtTokenSource { - tokens: Vec, + tokens: Vec, } -struct Tok { +struct TtToken { kind: SyntaxKind, is_joint_to_next: bool, text: SmolStr, @@ -124,7 +124,7 @@ impl TtTokenSource { } fn convert_leaf(&mut self, leaf: &tt::Leaf) { let tok = match leaf { - tt::Leaf::Literal(l) => Tok { + tt::Leaf::Literal(l) => TtToken { kind: SyntaxKind::INT_NUMBER, // FIXME is_joint_to_next: false, text: l.text.clone(), @@ -144,11 +144,11 @@ impl TtTokenSource { let s: &str = p.char.encode_utf8(&mut buf); SmolStr::new(s) }; - Tok { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text } + TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text } } tt::Leaf::Ident(ident) => { let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); - Tok { kind, is_joint_to_next: false, text: ident.text.clone() } + TtToken { kind, is_joint_to_next: false, text: ident.text.clone() } } }; self.tokens.push(tok) @@ -163,7 +163,7 @@ impl TtTokenSource { let idx = closing as usize; let kind = kinds[idx]; let text = &texts[idx..texts.len() - (1 - idx)]; - let tok = Tok { kind, is_joint_to_next: false, text: SmolStr::new(text) }; + let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text) }; self.tokens.push(tok) } } @@ -187,14 +187,14 @@ impl TokenSource for TtTokenSource { #[derive(Default)] struct TtTreeSink<'a> { buf: String, - tokens: &'a [Tok], + tokens: &'a [TtToken], text_pos: TextUnit, token_pos: usize, inner: SyntaxTreeBuilder, } impl<'a> TtTreeSink<'a> { - fn new(tokens: &'a [Tok]) -> TtTreeSink { + fn new(tokens: &'a [TtToken]) -> TtTreeSink { TtTreeSink { buf: String::new(), tokens, -- cgit v1.2.3